refactor scanned page filtering WIP

This commit is contained in:
Julius Unverfehrt 2023-02-01 15:16:12 +01:00
parent 0f440bdb09
commit c55777e339
2 changed files with 13 additions and 11 deletions

View File

@ -2,6 +2,7 @@ import atexit
import io
import json
import traceback
from _operator import itemgetter
from functools import partial, lru_cache
from itertools import chain, starmap, filterfalse
from operator import itemgetter, truth
@ -11,15 +12,16 @@ import fitz
from PIL import Image
from funcy import merge, pluck, curry, compose, rcompose, remove
from image_prediction.config import CONFIG
from image_prediction.formatter.formatters.enum import EnumFormatter
from image_prediction.image_extractor.extractor import ImageExtractor, ImageMetadataPair
from image_prediction.image_extractor.filters import (
filter_metadata_for_scanned_pages,
__breaches_image_to_page_quotient,
)
from image_prediction.info import Info
from image_prediction.stitching.stitching import stitch_pairs
from image_prediction.stitching.utils import validate_box_coords, validate_box_size
from image_prediction.transformer.transformers.response import compute_geometric_quotient
from image_prediction.utils import get_logger
from image_prediction.utils.generic import lift
@ -218,3 +220,12 @@ def clear_caches():
atexit.register(clear_caches)
def __breaches_image_to_page_quotient(metadatum):
page_width, page_height, x1, x2, y1, y2, width, height = itemgetter(
Info.PAGE_WIDTH, Info.PAGE_HEIGHT, Info.X1, Info.X2, Info.Y1, Info.Y2, Info.WIDTH, Info.HEIGHT
)(metadatum)
geometric_quotient = compute_geometric_quotient(page_width, page_height, x2, x1, y2, y1)
quotient_breached = bool(geometric_quotient > CONFIG.filters.image_to_page_quotient.max)
return quotient_breached

View File

@ -1,11 +1,9 @@
from _operator import itemgetter
from typing import List
from funcy import first, second
from image_prediction.config import CONFIG
from image_prediction.image_extractor.extractors.parsable import __breaches_image_to_page_quotient
from image_prediction.info import Info
from image_prediction.transformer.transformers.response import compute_geometric_quotient
from image_prediction.utils import get_logger
logger = get_logger()
@ -23,10 +21,3 @@ def is_metadata_of_a_scanned_page(metadata):
return first(map(__breaches_image_to_page_quotient, metadata)) and not second(metadata)
def __breaches_image_to_page_quotient(metadatum):
page_width, page_height, x1, x2, y1, y2, width, height = itemgetter(
Info.PAGE_WIDTH, Info.PAGE_HEIGHT, Info.X1, Info.X2, Info.Y1, Info.Y2, Info.WIDTH, Info.HEIGHT
)(metadatum)
geometric_quotient = compute_geometric_quotient(page_width, page_height, x2, x1, y2, y1)
quotient_breached = bool(geometric_quotient > CONFIG.filters.image_to_page_quotient.max)
return quotient_breached