From 75748a1d82f0ebdf3ad7d348c6d820c8858aa3cb Mon Sep 17 00:00:00 2001 From: Matthias Bisping Date: Mon, 25 Apr 2022 11:19:26 +0200 Subject: [PATCH] refactoring --- .../image_extractor/extractors/parsable.py | 3 +-- image_prediction/pipeline.py | 12 +++++++++--- scripts/pyinfra_mock.py | 2 +- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/image_prediction/image_extractor/extractors/parsable.py b/image_prediction/image_extractor/extractors/parsable.py index 00d3d1d..a022396 100644 --- a/image_prediction/image_extractor/extractors/parsable.py +++ b/image_prediction/image_extractor/extractors/parsable.py @@ -17,7 +17,7 @@ from image_prediction.utils.generic import lift class ParsablePDFImageExtractor(ImageExtractor): - def __init__(self, verbose=False, tolerance=0, progress_message=None): + def __init__(self, verbose=False, tolerance=0): """ Args: @@ -27,7 +27,6 @@ class ParsablePDFImageExtractor(ImageExtractor): self.doc: fitz.fitz.Document = None self.verbose = verbose self.tolerance = tolerance - self.progress_message = progress_message def extract(self, pdf: bytes, page_range: range = None): self.doc = fitz.Document(stream=pdf) diff --git a/image_prediction/pipeline.py b/image_prediction/pipeline.py index 89785bc..a7accd8 100644 --- a/image_prediction/pipeline.py +++ b/image_prediction/pipeline.py @@ -17,7 +17,7 @@ def load_pipeline(**kwargs): model_loader = get_mlflow_model_loader(MLRUNS_DIR) model_identifier = CONFIG.service.run_id - pipeline = Pipeline(model_loader, model_identifier, progress_message="Processing document", **kwargs) + pipeline = Pipeline(model_loader, model_identifier, **kwargs) return pipeline @@ -31,7 +31,8 @@ def star(f): class Pipeline: - def __init__(self, model_loader, model_identifier, batch_size=16, **kwargs): + def __init__(self, model_loader, model_identifier, batch_size=16, verbose=True, **kwargs): + self.verbose = verbose extract = get_extractor(**kwargs) classifier = get_image_classifier(model_loader, model_identifier) @@ -55,4 +56,9 @@ class Pipeline: ) def __call__(self, pdf: bytes, page_range: range = None): - yield from tqdm(self.pipe(pdf, page_range=page_range), desc="Processing images from document", unit=" images") + yield from tqdm( + self.pipe(pdf, page_range=page_range), + desc="Processing images from document", + unit=" images", + disable=not self.verbose, + ) diff --git a/scripts/pyinfra_mock.py b/scripts/pyinfra_mock.py index fec12e9..07fddec 100644 --- a/scripts/pyinfra_mock.py +++ b/scripts/pyinfra_mock.py @@ -6,7 +6,7 @@ import requests def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument("--pdf_path", required=True) + parser.add_argument("pdf_path") args = parser.parse_args() return args