Add typehints, make custom page quotient breach function private since the intention of outsourcing it from build_image_info is to make it testable seperately

This commit is contained in:
Julius Unverfehrt 2022-09-12 09:52:33 +02:00
parent 04aee4e627
commit 101b71726c
3 changed files with 6 additions and 6 deletions

View File

@ -5,7 +5,7 @@ import traceback
from functools import partial, lru_cache from functools import partial, lru_cache
from itertools import chain, starmap, filterfalse from itertools import chain, starmap, filterfalse
from operator import itemgetter, truth from operator import itemgetter, truth
from typing import List from typing import List, Iterable, Iterator
import fitz import fitz
from PIL import Image from PIL import Image
@ -59,7 +59,7 @@ class ParsablePDFImageExtractor(ImageExtractor):
yield from image_metadata_pairs yield from image_metadata_pairs
@staticmethod @staticmethod
def __filter_valid_images(image_metadata_pairs): def __filter_valid_images(image_metadata_pairs: Iterable[ImageMetadataPair]) -> Iterator[ImageMetadataPair]:
def validate(image: Image.Image, metadata: dict): def validate(image: Image.Image, metadata: dict):
try: try:
# TODO: stand-in heuristic for testing if image is valid => find cleaner solution (RED-5148) # TODO: stand-in heuristic for testing if image is valid => find cleaner solution (RED-5148)

View File

@ -27,7 +27,7 @@ def build_image_info(data: dict) -> dict:
quotient = round(compute_geometric_quotient(), 4) quotient = round(compute_geometric_quotient(), 4)
min_image_to_page_quotient_breached = bool(quotient < CONFIG.filters.image_to_page_quotient.min) min_image_to_page_quotient_breached = bool(quotient < CONFIG.filters.image_to_page_quotient.min)
max_image_to_page_quotient_breached = is_max_image_to_page_quotient_breached( max_image_to_page_quotient_breached = __is_max_image_to_page_quotient_breached(
quotient, data["classification"]["label"] quotient, data["classification"]["label"]
) )
min_image_width_to_height_quotient_breached = bool( min_image_width_to_height_quotient_breached = bool(
@ -77,7 +77,7 @@ def build_image_info(data: dict) -> dict:
return image_info return image_info
def is_max_image_to_page_quotient_breached(quotient, label): def __is_max_image_to_page_quotient_breached(quotient: float, label: str) -> bool:
default_max_quotient = CONFIG.filters.image_to_page_quotient.max default_max_quotient = CONFIG.filters.image_to_page_quotient.max
customized_entries = CONFIG.filters.image_to_page_quotient.customized.max customized_entries = CONFIG.filters.image_to_page_quotient.customized.max
max_quotient = customized_entries.get(label, default_max_quotient) max_quotient = customized_entries.get(label, default_max_quotient)

View File

@ -1,6 +1,6 @@
import pytest import pytest
from image_prediction.transformer.transformers.response import is_max_image_to_page_quotient_breached from image_prediction.transformer.transformers.response import __is_max_image_to_page_quotient_breached
@pytest.fixture @pytest.fixture
@ -18,4 +18,4 @@ def expected_is_breached(quotient, label):
@pytest.mark.parametrize("quotient", [0.1, 0.5]) @pytest.mark.parametrize("quotient", [0.1, 0.5])
@pytest.mark.parametrize("label", ["logo", "signature"]) @pytest.mark.parametrize("label", ["logo", "signature"])
def test_customized_per_label_ratio_breach(quotient, label, expected_is_breached): def test_customized_per_label_ratio_breach(quotient, label, expected_is_breached):
assert is_max_image_to_page_quotient_breached(quotient, label) == expected_is_breached assert __is_max_image_to_page_quotient_breached(quotient, label) == expected_is_breached