chore: add script for local and container debug

2024-08-30 10:28:33 +02:00 · 2024-08-30 10:28:33 +02:00 · c852434b75
commit c852434b75
parent 8655e25ec0
2 changed files with 46 additions and 1 deletions
--- a/scripts/debug/debug.py
+++ b/scripts/debug/debug.py
@ -0,0 +1,45 @@
 import json
 import os
 from kn_utils.logging import logger
 from image_prediction.config import CONFIG
 from image_prediction.pipeline import load_pipeline
 from image_prediction.utils.pdf_annotation import annotate_pdf
 import numpy as np
 import random
 import tensorflow as tf
 def process_pdf(pipeline, pdf_path, page_range=None):
    with open(pdf_path, "rb") as f:
        logger.info(f"Processing {pdf_path}")
        predictions = list(pipeline(f.read(), page_range=page_range))
    return predictions
 def ensure_seeds():
    seed = 42
    np.random.seed(seed)
    random.seed(seed)
    tf.random.set_seed(seed)
 def debug_info():
    devices = tf.config.list_physical_devices()
    print("Available devices:", devices)
    current_path = os.path.dirname(os.path.abspath(__file__))
    print(current_path)
 if __name__ == "__main__":
    # Change to path in docker container
    debug_file_path = "./src/debug.pdf"
    ensure_seeds()
    debug_info()
    pipeline = load_pipeline(verbose=True, batch_size=CONFIG.service.batch_size)
    predictions = process_pdf(pipeline, debug_file_path)
    # This is the image that has the wrong prediction mentioned in RED-9948. The predictions should inconclusive, and
    # the flag all passed should be false.
    predictions = [x for x in predictions if x["representation"] == "FA30F080F0C031CE17E8CF237"]
    print(json.dumps(predictions, indent=2))
--- a/test/unit_tests/encoder_test.py
+++ b/test/unit_tests/encoder_test.py
@ -36,7 +36,7 @@ def test_hash_encoder(images, hashed_images, base_patch_image):
 def test_all_hashes_have_length_of_twentyfive():
    """See RED-3814: all hashes should have 25 characters."""
-    pdf_path = Path(__file__).parents[1] / "data" / "similarImages2.pdf"
+    pdf_path = Path(__file__).parents[1] / "data" / "RED-3814" / "similarImages2.pdf"
    pdf_bytes = pdf_path.read_bytes()
    image_extractor = ParsablePDFImageExtractor()
    image_metadata_pairs = list(image_extractor.extract(pdf_bytes))