114 lines
4.1 KiB
Python
114 lines
4.1 KiB
Python
import json
|
|
import math
|
|
import os
|
|
from functools import lru_cache
|
|
from operator import itemgetter
|
|
|
|
from image_prediction.config import CONFIG
|
|
from image_prediction.exceptions import ParsingError
|
|
from image_prediction.transformer.transformer import Transformer
|
|
from image_prediction.utils import get_logger
|
|
from funcy import filter, juxt, first, rest
|
|
|
|
logger = get_logger()
|
|
|
|
|
|
class ResponseTransformer(Transformer):
|
|
def transform(self, data):
|
|
logger.debug("ResponseTransformer.transform")
|
|
return build_image_info(data)
|
|
|
|
|
|
def build_image_info(data: dict) -> dict:
|
|
def compute_geometric_quotient():
|
|
page_area_sqrt = math.sqrt(abs(page_width * page_height))
|
|
image_area_sqrt = math.sqrt(abs(x2 - x1) * abs(y2 - y1))
|
|
return image_area_sqrt / page_area_sqrt
|
|
|
|
page_width, page_height, x1, x2, y1, y2, width, height, alpha = itemgetter(
|
|
"page_width", "page_height", "x1", "x2", "y1", "y2", "width", "height", "alpha"
|
|
)(data)
|
|
|
|
quotient = round(compute_geometric_quotient(), 4)
|
|
|
|
min_image_to_page_quotient_breached = bool(quotient < CONFIG.filters.image_to_page_quotient.min)
|
|
max_image_to_page_quotient_breached = __is_max_image_to_page_quotient_breached(
|
|
quotient, data["classification"]["label"]
|
|
)
|
|
min_image_width_to_height_quotient_breached = bool(
|
|
width / height < CONFIG.filters.image_width_to_height_quotient.min
|
|
)
|
|
max_image_width_to_height_quotient_breached = bool(
|
|
width / height > CONFIG.filters.image_width_to_height_quotient.max
|
|
)
|
|
|
|
classification = data["classification"]
|
|
representation = data["representation"]
|
|
|
|
min_confidence_breached = bool(max(classification["probabilities"].values()) < CONFIG.filters.min_confidence)
|
|
|
|
image_info = {
|
|
"classification": classification,
|
|
"representation": representation,
|
|
"position": {"x1": x1, "x2": x2, "y1": y1, "y2": y2, "pageNumber": data["page_idx"] + 1},
|
|
"geometry": {"width": width, "height": height},
|
|
"alpha": alpha,
|
|
"filters": {
|
|
"geometry": {
|
|
"imageSize": {
|
|
"quotient": quotient,
|
|
"tooLarge": max_image_to_page_quotient_breached,
|
|
"tooSmall": min_image_to_page_quotient_breached,
|
|
},
|
|
"imageFormat": {
|
|
"quotient": round(width / height, 4),
|
|
"tooTall": min_image_width_to_height_quotient_breached,
|
|
"tooWide": max_image_width_to_height_quotient_breached,
|
|
},
|
|
},
|
|
"probability": {"unconfident": min_confidence_breached},
|
|
"allPassed": not any(
|
|
[
|
|
max_image_to_page_quotient_breached,
|
|
min_image_to_page_quotient_breached,
|
|
min_image_width_to_height_quotient_breached,
|
|
max_image_width_to_height_quotient_breached,
|
|
min_confidence_breached,
|
|
]
|
|
),
|
|
},
|
|
}
|
|
|
|
return image_info
|
|
|
|
|
|
def __is_max_image_to_page_quotient_breached(quotient: float, label: str) -> bool:
|
|
default_max_quotient = CONFIG.filters.image_to_page_quotient.max
|
|
customized_entries = CONFIG.filters.image_to_page_quotient.customized.max
|
|
max_quotient = customized_entries.get(label, default_max_quotient)
|
|
max_quotient = max_quotient if max_quotient else default_max_quotient
|
|
return bool(quotient > max_quotient)
|
|
|
|
|
|
@lru_cache(maxsize=None)
|
|
def parse_env_var(prefix, fallback_value):
|
|
head, tail = juxt(first, rest)(filter(prefix, os.environ))
|
|
if not head or tail:
|
|
logger.warning(
|
|
f"Found multiple candidates for environment variable with prefix '{prefix}', falling back to default value."
|
|
)
|
|
return fallback_value
|
|
else:
|
|
try:
|
|
return parse_env_var_value(os.environ[head])
|
|
except ParsingError as err:
|
|
logger.warning(f"{err}, falling back to default value.")
|
|
return fallback_value
|
|
|
|
|
|
def parse_env_var_value(env_var_value):
|
|
try:
|
|
return json.loads(env_var_value)
|
|
except Exception as err:
|
|
raise ParsingError(f"Failed to parse {env_var_value}") from err
|