chore: add script for local and container debug
This commit is contained in:
parent
8655e25ec0
commit
c852434b75
45
scripts/debug/debug.py
Normal file
45
scripts/debug/debug.py
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
from kn_utils.logging import logger
|
||||||
|
|
||||||
|
from image_prediction.config import CONFIG
|
||||||
|
from image_prediction.pipeline import load_pipeline
|
||||||
|
from image_prediction.utils.pdf_annotation import annotate_pdf
|
||||||
|
import numpy as np
|
||||||
|
import random
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
|
||||||
|
def process_pdf(pipeline, pdf_path, page_range=None):
|
||||||
|
with open(pdf_path, "rb") as f:
|
||||||
|
logger.info(f"Processing {pdf_path}")
|
||||||
|
predictions = list(pipeline(f.read(), page_range=page_range))
|
||||||
|
|
||||||
|
return predictions
|
||||||
|
|
||||||
|
def ensure_seeds():
|
||||||
|
seed = 42
|
||||||
|
np.random.seed(seed)
|
||||||
|
random.seed(seed)
|
||||||
|
tf.random.set_seed(seed)
|
||||||
|
|
||||||
|
def debug_info():
|
||||||
|
devices = tf.config.list_physical_devices()
|
||||||
|
print("Available devices:", devices)
|
||||||
|
current_path = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
print(current_path)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Change to path in docker container
|
||||||
|
debug_file_path = "./src/debug.pdf"
|
||||||
|
ensure_seeds()
|
||||||
|
debug_info()
|
||||||
|
|
||||||
|
pipeline = load_pipeline(verbose=True, batch_size=CONFIG.service.batch_size)
|
||||||
|
predictions = process_pdf(pipeline, debug_file_path)
|
||||||
|
# This is the image that has the wrong prediction mentioned in RED-9948. The predictions should inconclusive, and
|
||||||
|
# the flag all passed should be false.
|
||||||
|
predictions = [x for x in predictions if x["representation"] == "FA30F080F0C031CE17E8CF237"]
|
||||||
|
print(json.dumps(predictions, indent=2))
|
||||||
@ -36,7 +36,7 @@ def test_hash_encoder(images, hashed_images, base_patch_image):
|
|||||||
|
|
||||||
def test_all_hashes_have_length_of_twentyfive():
|
def test_all_hashes_have_length_of_twentyfive():
|
||||||
"""See RED-3814: all hashes should have 25 characters."""
|
"""See RED-3814: all hashes should have 25 characters."""
|
||||||
pdf_path = Path(__file__).parents[1] / "data" / "similarImages2.pdf"
|
pdf_path = Path(__file__).parents[1] / "data" / "RED-3814" / "similarImages2.pdf"
|
||||||
pdf_bytes = pdf_path.read_bytes()
|
pdf_bytes = pdf_path.read_bytes()
|
||||||
image_extractor = ParsablePDFImageExtractor()
|
image_extractor = ParsablePDFImageExtractor()
|
||||||
image_metadata_pairs = list(image_extractor.extract(pdf_bytes))
|
image_metadata_pairs = list(image_extractor.extract(pdf_bytes))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user