"""Script to debug RED-9948. The predictions unexpectedly changed for some images, and we need to understand why.""" import json import random from pathlib import Path import numpy as np import tensorflow as tf from kn_utils.logging import logger from image_prediction.config import CONFIG from image_prediction.pipeline import load_pipeline def process_pdf(pipeline, pdf_path, page_range=None): with open(pdf_path, "rb") as f: logger.info(f"Processing {pdf_path}") predictions = list(pipeline(f.read(), page_range=page_range)) return predictions def ensure_seeds(): seed = 42 np.random.seed(seed) random.seed(seed) tf.random.set_seed(seed) def debug_info(): devices = tf.config.list_physical_devices() print("Available devices:", devices) if __name__ == "__main__": # For in container debugging, copy the file and adjust the path. debug_file_path = Path(__file__).parents[2] / "test" / "data" / "RED-9948" / "SYNGENTA_EFSA_sanitisation_GFL_v2" ensure_seeds() debug_info() pipeline = load_pipeline(verbose=True, batch_size=CONFIG.service.batch_size) predictions = process_pdf(pipeline, debug_file_path) # This is the image that has the wrong prediction mentioned in RED-9948. The predictions should inconclusive, and # the flag all passed should be false. predictions = [x for x in predictions if x["representation"] == "FA30F080F0C031CE17E8CF237"] print(json.dumps(predictions, indent=2))