refactor scanned page filtering WIP
This commit is contained in:
parent
0f440bdb09
commit
c55777e339
@ -2,6 +2,7 @@ import atexit
|
||||
import io
|
||||
import json
|
||||
import traceback
|
||||
from _operator import itemgetter
|
||||
from functools import partial, lru_cache
|
||||
from itertools import chain, starmap, filterfalse
|
||||
from operator import itemgetter, truth
|
||||
@ -11,15 +12,16 @@ import fitz
|
||||
from PIL import Image
|
||||
from funcy import merge, pluck, curry, compose, rcompose, remove
|
||||
|
||||
from image_prediction.config import CONFIG
|
||||
from image_prediction.formatter.formatters.enum import EnumFormatter
|
||||
from image_prediction.image_extractor.extractor import ImageExtractor, ImageMetadataPair
|
||||
from image_prediction.image_extractor.filters import (
|
||||
filter_metadata_for_scanned_pages,
|
||||
__breaches_image_to_page_quotient,
|
||||
)
|
||||
from image_prediction.info import Info
|
||||
from image_prediction.stitching.stitching import stitch_pairs
|
||||
from image_prediction.stitching.utils import validate_box_coords, validate_box_size
|
||||
from image_prediction.transformer.transformers.response import compute_geometric_quotient
|
||||
from image_prediction.utils import get_logger
|
||||
from image_prediction.utils.generic import lift
|
||||
|
||||
@ -218,3 +220,12 @@ def clear_caches():
|
||||
|
||||
|
||||
atexit.register(clear_caches)
|
||||
|
||||
|
||||
def __breaches_image_to_page_quotient(metadatum):
|
||||
page_width, page_height, x1, x2, y1, y2, width, height = itemgetter(
|
||||
Info.PAGE_WIDTH, Info.PAGE_HEIGHT, Info.X1, Info.X2, Info.Y1, Info.Y2, Info.WIDTH, Info.HEIGHT
|
||||
)(metadatum)
|
||||
geometric_quotient = compute_geometric_quotient(page_width, page_height, x2, x1, y2, y1)
|
||||
quotient_breached = bool(geometric_quotient > CONFIG.filters.image_to_page_quotient.max)
|
||||
return quotient_breached
|
||||
|
||||
@ -1,11 +1,9 @@
|
||||
from _operator import itemgetter
|
||||
from typing import List
|
||||
|
||||
from funcy import first, second
|
||||
|
||||
from image_prediction.config import CONFIG
|
||||
from image_prediction.image_extractor.extractors.parsable import __breaches_image_to_page_quotient
|
||||
from image_prediction.info import Info
|
||||
from image_prediction.transformer.transformers.response import compute_geometric_quotient
|
||||
from image_prediction.utils import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
@ -23,10 +21,3 @@ def is_metadata_of_a_scanned_page(metadata):
|
||||
return first(map(__breaches_image_to_page_quotient, metadata)) and not second(metadata)
|
||||
|
||||
|
||||
def __breaches_image_to_page_quotient(metadatum):
|
||||
page_width, page_height, x1, x2, y1, y2, width, height = itemgetter(
|
||||
Info.PAGE_WIDTH, Info.PAGE_HEIGHT, Info.X1, Info.X2, Info.Y1, Info.Y2, Info.WIDTH, Info.HEIGHT
|
||||
)(metadatum)
|
||||
geometric_quotient = compute_geometric_quotient(page_width, page_height, x2, x1, y2, y1)
|
||||
quotient_breached = bool(geometric_quotient > CONFIG.filters.image_to_page_quotient.max)
|
||||
return quotient_breached
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user