From c55777e3394c98a41deba7af406638f034a0777c Mon Sep 17 00:00:00 2001 From: Julius Unverfehrt Date: Wed, 1 Feb 2023 15:16:12 +0100 Subject: [PATCH] refactor scanned page filtering WIP --- .../image_extractor/extractors/parsable.py | 13 ++++++++++++- image_prediction/image_extractor/filters.py | 11 +---------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/image_prediction/image_extractor/extractors/parsable.py b/image_prediction/image_extractor/extractors/parsable.py index 4f80723..0af1f10 100644 --- a/image_prediction/image_extractor/extractors/parsable.py +++ b/image_prediction/image_extractor/extractors/parsable.py @@ -2,6 +2,7 @@ import atexit import io import json import traceback +from _operator import itemgetter from functools import partial, lru_cache from itertools import chain, starmap, filterfalse from operator import itemgetter, truth @@ -11,15 +12,16 @@ import fitz from PIL import Image from funcy import merge, pluck, curry, compose, rcompose, remove +from image_prediction.config import CONFIG from image_prediction.formatter.formatters.enum import EnumFormatter from image_prediction.image_extractor.extractor import ImageExtractor, ImageMetadataPair from image_prediction.image_extractor.filters import ( filter_metadata_for_scanned_pages, - __breaches_image_to_page_quotient, ) from image_prediction.info import Info from image_prediction.stitching.stitching import stitch_pairs from image_prediction.stitching.utils import validate_box_coords, validate_box_size +from image_prediction.transformer.transformers.response import compute_geometric_quotient from image_prediction.utils import get_logger from image_prediction.utils.generic import lift @@ -218,3 +220,12 @@ def clear_caches(): atexit.register(clear_caches) + + +def __breaches_image_to_page_quotient(metadatum): + page_width, page_height, x1, x2, y1, y2, width, height = itemgetter( + Info.PAGE_WIDTH, Info.PAGE_HEIGHT, Info.X1, Info.X2, Info.Y1, Info.Y2, Info.WIDTH, Info.HEIGHT + )(metadatum) + geometric_quotient = compute_geometric_quotient(page_width, page_height, x2, x1, y2, y1) + quotient_breached = bool(geometric_quotient > CONFIG.filters.image_to_page_quotient.max) + return quotient_breached diff --git a/image_prediction/image_extractor/filters.py b/image_prediction/image_extractor/filters.py index e77b84b..13053de 100644 --- a/image_prediction/image_extractor/filters.py +++ b/image_prediction/image_extractor/filters.py @@ -1,11 +1,9 @@ -from _operator import itemgetter from typing import List from funcy import first, second -from image_prediction.config import CONFIG +from image_prediction.image_extractor.extractors.parsable import __breaches_image_to_page_quotient from image_prediction.info import Info -from image_prediction.transformer.transformers.response import compute_geometric_quotient from image_prediction.utils import get_logger logger = get_logger() @@ -23,10 +21,3 @@ def is_metadata_of_a_scanned_page(metadata): return first(map(__breaches_image_to_page_quotient, metadata)) and not second(metadata) -def __breaches_image_to_page_quotient(metadatum): - page_width, page_height, x1, x2, y1, y2, width, height = itemgetter( - Info.PAGE_WIDTH, Info.PAGE_HEIGHT, Info.X1, Info.X2, Info.Y1, Info.Y2, Info.WIDTH, Info.HEIGHT - )(metadatum) - geometric_quotient = compute_geometric_quotient(page_width, page_height, x2, x1, y2, y1) - quotient_breached = bool(geometric_quotient > CONFIG.filters.image_to_page_quotient.max) - return quotient_breached