diff --git a/scripts/debug/debug.py b/scripts/debug/debug.py new file mode 100644 index 0000000..0e7704d --- /dev/null +++ b/scripts/debug/debug.py @@ -0,0 +1,45 @@ +import json +import os + +from kn_utils.logging import logger + +from image_prediction.config import CONFIG +from image_prediction.pipeline import load_pipeline +from image_prediction.utils.pdf_annotation import annotate_pdf +import numpy as np +import random +import tensorflow as tf + + +def process_pdf(pipeline, pdf_path, page_range=None): + with open(pdf_path, "rb") as f: + logger.info(f"Processing {pdf_path}") + predictions = list(pipeline(f.read(), page_range=page_range)) + + return predictions + +def ensure_seeds(): + seed = 42 + np.random.seed(seed) + random.seed(seed) + tf.random.set_seed(seed) + +def debug_info(): + devices = tf.config.list_physical_devices() + print("Available devices:", devices) + current_path = os.path.dirname(os.path.abspath(__file__)) + print(current_path) + + +if __name__ == "__main__": + # Change to path in docker container + debug_file_path = "./src/debug.pdf" + ensure_seeds() + debug_info() + + pipeline = load_pipeline(verbose=True, batch_size=CONFIG.service.batch_size) + predictions = process_pdf(pipeline, debug_file_path) + # This is the image that has the wrong prediction mentioned in RED-9948. The predictions should inconclusive, and + # the flag all passed should be false. + predictions = [x for x in predictions if x["representation"] == "FA30F080F0C031CE17E8CF237"] + print(json.dumps(predictions, indent=2)) diff --git a/test/unit_tests/encoder_test.py b/test/unit_tests/encoder_test.py index 5524197..edabbba 100644 --- a/test/unit_tests/encoder_test.py +++ b/test/unit_tests/encoder_test.py @@ -36,7 +36,7 @@ def test_hash_encoder(images, hashed_images, base_patch_image): def test_all_hashes_have_length_of_twentyfive(): """See RED-3814: all hashes should have 25 characters.""" - pdf_path = Path(__file__).parents[1] / "data" / "similarImages2.pdf" + pdf_path = Path(__file__).parents[1] / "data" / "RED-3814" / "similarImages2.pdf" pdf_bytes = pdf_path.read_bytes() image_extractor = ParsablePDFImageExtractor() image_metadata_pairs = list(image_extractor.extract(pdf_bytes))