improve the readability of variable names and docstrings

2023-02-01 10:08:36 +01:00 · 2023-02-01 10:08:36 +01:00 · 876260f403
commit 876260f403
parent 368c54a8be
2 changed files with 5 additions and 5 deletions
--- a/image_prediction/image_extractor/extractors/parsable.py
+++ b/image_prediction/image_extractor/extractors/parsable.py
@ -47,7 +47,7 @@ class ParsablePDFImageExtractor(ImageExtractor):

    # FIXME: Heuristic filtering shouldn't take place here,
    #  consider introducing a preprocessing step before extracting images,
-    #  e.g. together with a image validation step for broken images.
+    #  e.g. together with an image validation step for broken images.
    @filter_scanned_pages
    def __process_images_on_page(self, page: fitz.fitz.Page):
        images = get_images_on_page(self.doc, page)
--- a/image_prediction/image_extractor/filters.py
+++ b/image_prediction/image_extractor/filters.py
@ -16,17 +16,17 @@ def filter_scanned_pages(page_processor: Callable):
    """Decorator for the __process_images_on_page method of the ParsablePDFImageExtractor.
    This makes it so that scanned pages won't be processed (and are thus ultimately removed from the pipline).
    A scanned page is defined by
-        - having only one image on a page
+        - having only one image on a page and
        - that image having an image_to_page ratio greater than the allowed max value
-          found in the CONFIG.filters.image_to_page_quotient.max"""
+          defined in CONFIG.filters.image_to_page_quotient.max"""

-    def inner(self: ImageExtractor, page: fitz.fitz.Page):
+    def inner(extractor: ImageExtractor, page: fitz.fitz.Page):
        metadata = get_metadata(page)
        if is_a_scanned_page(metadata):
            logger.debug(f"Page {page.number} won't be processed since it is a scanned page.")
            yield from []
        else:
-            yield from page_processor(self, page)
+            yield from page_processor(extractor, page)

    logger.info(f"Extracting pages with filtering scanned pages...")
    return inner