Merge in RR/image-prediction from RED-5202-port-hotfixes to master
Squashed commit of the following:
commit aaa02ea35e9c1b3b307116d7e3e32c93fd79ef5d
Merge: 5d87066 521222e
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Mon Sep 12 15:28:39 2022 +0200
Merge branch 'master' of ssh://git.iqser.com:2222/rr/image-prediction into RED-5202-port-hotfixes
commit 5d87066b40b28f919b1346f5e5396b46445b4e00
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Mon Sep 12 15:25:01 2022 +0200
remove warning log for non existent non default env var
commit 23c61ef49ef918b29952150d4a6e61b99d60ac64
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Mon Sep 12 15:14:19 2022 +0200
make env var parser discrete
commit c1b92270354c764861da0f7782348e9cd0725d76
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Mon Sep 12 13:28:44 2022 +0200
fixed statefulness issue with os.environ in tests
commit ad9c5657fe93079d5646ba2b70fa091e8d2daf76
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Mon Sep 12 13:04:55 2022 +0200
- Adapted response formatting logic for threshold maps passed via env vars.
- Added test for reading threshold maps and values from env vars.
commit c60e8cd6781b8e0c3ec69ccd0a25375803de26f0
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Mon Sep 12 11:38:01 2022 +0200
add parser for environment variables WIP
commit 101b71726c697f30ec9298ba62d2203bd7da2efb
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Mon Sep 12 09:52:33 2022 +0200
Add typehints, make custom page quotient breach function private since the intention of outsourcing it from build_image_info is to make it testable seperately
commit 04aee4e62781e78cd54c6d20e961dcd7bf1fc081
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Mon Sep 12 09:25:59 2022 +0200
DotIndexable default get method exception made more specific
commit 4584e7ba66400033dc5f1a38473b644eeb11e67c
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Mon Sep 12 08:55:05 2022 +0200
RED-5202 port temporary broken image handling so the hotfix won't be lost by upgrading the service. A proper solution is still desirable (see RED-5148)
commit 5f99622646b3f6d3a842aebef91ff8e082072cd6
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Mon Sep 12 08:47:02 2022 +0200
RED-5202 add per class customizable max image to page quotient setting for signatures, default is 0.4. Can be overwritten by , set to null to use default value or set to value that should be used.
149 lines
5.1 KiB
Python
149 lines
5.1 KiB
Python
import json
|
|
import math
|
|
import os
|
|
from functools import lru_cache
|
|
from operator import itemgetter
|
|
|
|
from funcy import first
|
|
|
|
from image_prediction.config import CONFIG
|
|
from image_prediction.exceptions import ParsingError
|
|
from image_prediction.transformer.transformer import Transformer
|
|
from image_prediction.utils import get_logger
|
|
|
|
logger = get_logger()
|
|
|
|
|
|
class ResponseTransformer(Transformer):
|
|
def transform(self, data):
|
|
logger.debug("ResponseTransformer.transform")
|
|
return build_image_info(data)
|
|
|
|
|
|
def get_class_specific_min_image_to_page_quotient(label, table=None):
|
|
return get_class_specific_value(
|
|
"REL_IMAGE_SIZE", label, "min", CONFIG.filters.image_to_page_quotient.min, table=table
|
|
)
|
|
|
|
|
|
def get_class_specific_max_image_to_page_quotient(label, table=None):
|
|
return get_class_specific_value(
|
|
"REL_IMAGE_SIZE", label, "max", CONFIG.filters.image_to_page_quotient.max, table=table
|
|
)
|
|
|
|
|
|
def get_class_specific_min_image_width_to_height_quotient(label, table=None):
|
|
return get_class_specific_value(
|
|
"IMAGE_FORMAT", label, "min", CONFIG.filters.image_width_to_height_quotient.min, table=table
|
|
)
|
|
|
|
|
|
def get_class_specific_max_image_width_to_height_quotient(label, table=None):
|
|
return get_class_specific_value(
|
|
"IMAGE_FORMAT", label, "max", CONFIG.filters.image_width_to_height_quotient.max, table=table
|
|
)
|
|
|
|
|
|
def get_class_specific_min_classification_confidence(label, table=None):
|
|
return get_class_specific_value("CONFIDENCE", label, "min", CONFIG.filters.min_confidence, table=table)
|
|
|
|
|
|
def get_class_specific_value(prefix, label, bound, fallback_value, table=None):
|
|
def fallback():
|
|
logger.warning(f"Failed to resolve {bound} {prefix.lower().replace('_', '-')} value for class '{label}'.")
|
|
return fallback_value
|
|
|
|
assert bound in ["min", "max"]
|
|
|
|
threshold_map = parse_env_var(prefix, table=table) or {}
|
|
return threshold_map.get(label, {}).get(bound) or fallback()
|
|
|
|
|
|
def build_image_info(data: dict) -> dict:
|
|
def compute_geometric_quotient():
|
|
page_area_sqrt = math.sqrt(abs(page_width * page_height))
|
|
image_area_sqrt = math.sqrt(abs(x2 - x1) * abs(y2 - y1))
|
|
return image_area_sqrt / page_area_sqrt
|
|
|
|
page_width, page_height, x1, x2, y1, y2, width, height, alpha = itemgetter(
|
|
"page_width", "page_height", "x1", "x2", "y1", "y2", "width", "height", "alpha"
|
|
)(data)
|
|
|
|
classification = data["classification"]
|
|
label = classification["label"]
|
|
representation = data["representation"]
|
|
|
|
geometric_quotient = round(compute_geometric_quotient(), 4)
|
|
|
|
min_image_to_page_quotient_breached = bool(
|
|
geometric_quotient < get_class_specific_min_image_to_page_quotient(label)
|
|
)
|
|
max_image_to_page_quotient_breached = bool(
|
|
geometric_quotient > get_class_specific_max_image_to_page_quotient(label)
|
|
)
|
|
|
|
min_image_width_to_height_quotient_breached = bool(
|
|
width / height < get_class_specific_min_image_width_to_height_quotient(label)
|
|
)
|
|
max_image_width_to_height_quotient_breached = bool(
|
|
width / height > get_class_specific_max_image_width_to_height_quotient(label)
|
|
)
|
|
|
|
min_confidence_breached = bool(
|
|
max(classification["probabilities"].values()) < get_class_specific_min_classification_confidence(label)
|
|
)
|
|
|
|
image_info = {
|
|
"classification": classification,
|
|
"representation": representation,
|
|
"position": {"x1": x1, "x2": x2, "y1": y1, "y2": y2, "pageNumber": data["page_idx"] + 1},
|
|
"geometry": {"width": width, "height": height},
|
|
"alpha": alpha,
|
|
"filters": {
|
|
"geometry": {
|
|
"imageSize": {
|
|
"quotient": geometric_quotient,
|
|
"tooLarge": max_image_to_page_quotient_breached,
|
|
"tooSmall": min_image_to_page_quotient_breached,
|
|
},
|
|
"imageFormat": {
|
|
"quotient": round(width / height, 4),
|
|
"tooTall": min_image_width_to_height_quotient_breached,
|
|
"tooWide": max_image_width_to_height_quotient_breached,
|
|
},
|
|
},
|
|
"probability": {"unconfident": min_confidence_breached},
|
|
"allPassed": not any(
|
|
[
|
|
max_image_to_page_quotient_breached,
|
|
min_image_to_page_quotient_breached,
|
|
min_image_width_to_height_quotient_breached,
|
|
max_image_width_to_height_quotient_breached,
|
|
min_confidence_breached,
|
|
]
|
|
),
|
|
},
|
|
}
|
|
|
|
return image_info
|
|
|
|
|
|
@lru_cache(maxsize=None)
|
|
def parse_env_var(prefix, table=None):
|
|
table = table or os.environ
|
|
head = first(filter(lambda s: s == prefix, table))
|
|
if head:
|
|
try:
|
|
return parse_env_var_value(table[head])
|
|
except ParsingError as err:
|
|
logger.warning(err)
|
|
else:
|
|
return None
|
|
|
|
|
|
def parse_env_var_value(env_var_value):
|
|
try:
|
|
return json.loads(env_var_value)
|
|
except Exception as err:
|
|
raise ParsingError(f"Failed to parse {env_var_value}") from err
|