Make test for bad xref work
This commit is contained in:
parent
825099d946
commit
0244ba7f17
@ -1,9 +1,9 @@
|
||||
webserver:
|
||||
host: $SERVER_HOST|"127.0.0.1" # webserver address
|
||||
port: $SERVER_PORT|5000 # webserver port
|
||||
host: $SERVER_HOST|"127.0.0.1" # Webserver address
|
||||
port: $SERVER_PORT|5000 # Webserver port
|
||||
|
||||
service:
|
||||
logging_level: $LOGGING_LEVEL_ROOT|DEBUG # Logging level for service logger
|
||||
logging_level: $LOGGING_LEVEL_ROOT|INFO # Logging level for service logger
|
||||
verbose: $VERBOSE|True # Service prints document processing progress to stdout
|
||||
batch_size: $BATCH_SIZE|16 # Number of images in memory simultaneously
|
||||
mlflow_run_id: $MLFLOW_RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the service_estimator from
|
||||
|
||||
@ -47,7 +47,7 @@ class ParsablePDFImageExtractor(ImageExtractor):
|
||||
|
||||
def __process_images_on_page(self, page: fitz.fitz.Page):
|
||||
metadata = extract_valid_metadata(self.doc, page)
|
||||
image_metadata_pairs = map(partial(metadatum_to_image_metadata_pair, self.doc), metadata)
|
||||
image_metadata_pairs = keep(partial(metadatum_to_image_metadata_pair, self.doc), metadata)
|
||||
clear_caches()
|
||||
|
||||
# TODO: In the future, consider to introduce an image validator as a pipeline component rather than doing the
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user