Make test for bad xref work

This commit is contained in:
Matthias Bisping 2023-02-06 12:18:25 +01:00
parent 825099d946
commit 0244ba7f17
2 changed files with 4 additions and 4 deletions

View File

@ -1,9 +1,9 @@
webserver: webserver:
host: $SERVER_HOST|"127.0.0.1" # webserver address host: $SERVER_HOST|"127.0.0.1" # Webserver address
port: $SERVER_PORT|5000 # webserver port port: $SERVER_PORT|5000 # Webserver port
service: service:
logging_level: $LOGGING_LEVEL_ROOT|DEBUG # Logging level for service logger logging_level: $LOGGING_LEVEL_ROOT|INFO # Logging level for service logger
verbose: $VERBOSE|True # Service prints document processing progress to stdout verbose: $VERBOSE|True # Service prints document processing progress to stdout
batch_size: $BATCH_SIZE|16 # Number of images in memory simultaneously batch_size: $BATCH_SIZE|16 # Number of images in memory simultaneously
mlflow_run_id: $MLFLOW_RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the service_estimator from mlflow_run_id: $MLFLOW_RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the service_estimator from

View File

@ -47,7 +47,7 @@ class ParsablePDFImageExtractor(ImageExtractor):
def __process_images_on_page(self, page: fitz.fitz.Page): def __process_images_on_page(self, page: fitz.fitz.Page):
metadata = extract_valid_metadata(self.doc, page) metadata = extract_valid_metadata(self.doc, page)
image_metadata_pairs = map(partial(metadatum_to_image_metadata_pair, self.doc), metadata) image_metadata_pairs = keep(partial(metadatum_to_image_metadata_pair, self.doc), metadata)
clear_caches() clear_caches()
# TODO: In the future, consider to introduce an image validator as a pipeline component rather than doing the # TODO: In the future, consider to introduce an image validator as a pipeline component rather than doing the