From 0244ba7f1749deeede8a0e003d2f2c9c0f975a4e Mon Sep 17 00:00:00 2001 From: Matthias Bisping Date: Mon, 6 Feb 2023 12:18:25 +0100 Subject: [PATCH] Make test for bad xref work --- config.yaml | 6 +++--- image_prediction/image_extractor/extractors/parsable.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config.yaml b/config.yaml index d0f3c96..9bfcaf1 100644 --- a/config.yaml +++ b/config.yaml @@ -1,9 +1,9 @@ webserver: - host: $SERVER_HOST|"127.0.0.1" # webserver address - port: $SERVER_PORT|5000 # webserver port + host: $SERVER_HOST|"127.0.0.1" # Webserver address + port: $SERVER_PORT|5000 # Webserver port service: - logging_level: $LOGGING_LEVEL_ROOT|DEBUG # Logging level for service logger + logging_level: $LOGGING_LEVEL_ROOT|INFO # Logging level for service logger verbose: $VERBOSE|True # Service prints document processing progress to stdout batch_size: $BATCH_SIZE|16 # Number of images in memory simultaneously mlflow_run_id: $MLFLOW_RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the service_estimator from diff --git a/image_prediction/image_extractor/extractors/parsable.py b/image_prediction/image_extractor/extractors/parsable.py index 102a9a4..efaf0b3 100644 --- a/image_prediction/image_extractor/extractors/parsable.py +++ b/image_prediction/image_extractor/extractors/parsable.py @@ -47,7 +47,7 @@ class ParsablePDFImageExtractor(ImageExtractor): def __process_images_on_page(self, page: fitz.fitz.Page): metadata = extract_valid_metadata(self.doc, page) - image_metadata_pairs = map(partial(metadatum_to_image_metadata_pair, self.doc), metadata) + image_metadata_pairs = keep(partial(metadatum_to_image_metadata_pair, self.doc), metadata) clear_caches() # TODO: In the future, consider to introduce an image validator as a pipeline component rather than doing the