Julius Unverfehrt c888453cc6 fix: pin dependencies to working versions
BREAKING CHANGE

Recent pyinfra changes update tensorflow implicitely (see RED-9948).
This can be fixed by pinning tensorflow and protobuf.
However this makes the service incompatible with the current pyinfra
versions.
2024-08-30 15:52:55 +02:00

47 lines
1.5 KiB
Python

"""Script to debug RED-9948. The predictions unexpectedly changed for some images, and we need to understand why."""
import json
import random
from pathlib import Path
import numpy as np
import tensorflow as tf
from kn_utils.logging import logger
from image_prediction.config import CONFIG
from image_prediction.pipeline import load_pipeline
def process_pdf(pipeline, pdf_path, page_range=None):
with open(pdf_path, "rb") as f:
logger.info(f"Processing {pdf_path}")
predictions = list(pipeline(f.read(), page_range=page_range))
return predictions
def ensure_seeds():
seed = 42
np.random.seed(seed)
random.seed(seed)
tf.random.set_seed(seed)
def debug_info():
devices = tf.config.list_physical_devices()
print("Available devices:", devices)
if __name__ == "__main__":
# For in container debugging, copy the file and adjust the path.
debug_file_path = Path(__file__).parents[2] / "test" / "data" / "RED-9948" / "SYNGENTA_EFSA_sanitisation_GFL_v2"
ensure_seeds()
debug_info()
pipeline = load_pipeline(verbose=True, batch_size=CONFIG.service.batch_size)
predictions = process_pdf(pipeline, debug_file_path)
# This is the image that has the wrong prediction mentioned in RED-9948. The predictions should inconclusive, and
# the flag all passed should be false.
predictions = [x for x in predictions if x["representation"] == "FA30F080F0C031CE17E8CF237"]
print(json.dumps(predictions, indent=2))