Merge branch 'master' of ssh://git.iqser.com:2222/rr/image-prediction into RED-6189-bugfix

This commit is contained in:
Julius Unverfehrt 2023-02-13 17:23:07 +01:00
commit 79455f0dd6
17 changed files with 133 additions and 151 deletions

View File

@ -1,6 +1,6 @@
webserver:
host: $SERVER_HOST|"127.0.0.1" # webserver address
port: $SERVER_PORT|5000 # webserver port
host: $SERVER_HOST|"127.0.0.1" # Webserver address
port: $SERVER_PORT|5000 # Webserver port
service:
logging_level: $LOGGING_LEVEL_ROOT|INFO # Logging level for service logger

View File

@ -36,3 +36,7 @@ class InvalidBox(Exception):
class ParsingError(Exception):
pass
class BadXref(ValueError):
pass

View File

@ -13,7 +13,7 @@ from PIL import Image
from funcy import merge, pluck, compose, rcompose, remove, keep
from image_prediction.config import CONFIG
from image_prediction.exceptions import InvalidBox
from image_prediction.exceptions import InvalidBox, BadXref
from image_prediction.formatter.formatters.enum import EnumFormatter
from image_prediction.image_extractor.extractor import ImageExtractor, ImageMetadataPair
from image_prediction.info import Info

View File

@ -3,15 +3,14 @@
from pathlib import Path
MODULE_DIR = Path(__file__).resolve().parents[0]
PACKAGE_ROOT_DIR = MODULE_DIR.parents[0]
CONFIG_FILE = PACKAGE_ROOT_DIR / "config.yaml"
BANNER_FILE = PACKAGE_ROOT_DIR / "banner.txt"
DATA_DIR = PACKAGE_ROOT_DIR / "data"
MLRUNS_DIR = str(DATA_DIR / "mlruns")
TEST_DATA_DIR = PACKAGE_ROOT_DIR / "test" / "data"
TEST_DIR = PACKAGE_ROOT_DIR / "test"
TEST_DATA_DIR = TEST_DIR / "data"
TEST_DATA_DIR_DVC = TEST_DIR / "data.dvc"

View File

@ -1,6 +1,15 @@
from functools import wraps
from inspect import signature
from itertools import starmap
from typing import Callable
from funcy import iterate, first, curry, map
from pymonad.either import Left, Right, Either
from pymonad.tools import curry as pmcurry
from image_prediction.utils import get_logger
logger = get_logger()
def until(cond, func, *args, **kwargs):
@ -13,3 +22,63 @@ def lift(fn):
def starlift(fn):
return curry(starmap)(fn)
def bottom(*args, **kwargs):
return False
def top(*args, **kwargs):
return True
def left(fn):
@wraps(fn)
def inner(x):
return Left(fn(x))
return inner
def right(fn):
@wraps(fn)
def inner(x):
return Right(fn(x))
return inner
def wrap_left(fn, success_condition=top, error_message=None) -> Callable:
return wrap_either(Left, Right, success_condition=success_condition, error_message=error_message)(fn)
def wrap_right(fn, success_condition=top, error_message=None) -> Callable:
return wrap_either(Right, Left, success_condition=success_condition, error_message=error_message)(fn)
def wrap_either(success_type, failure_type, success_condition=top, error_message=None) -> Callable:
@wraps(wrap_either)
def wrapper(fn) -> Callable:
n_params = len(signature(fn).parameters)
@pmcurry(n_params)
@wraps(fn)
def wrapper(*args, **kwargs) -> Either:
try:
result = fn(*args, **kwargs)
if success_condition(result):
return success_type(result)
else:
return failure_type({"error": error_message, "result": result})
except Exception as err:
logger.error(err)
return failure_type({"error": error_message or err, "result": Void})
return wrapper
return wrapper
class Void:
pass

View File

@ -23,3 +23,5 @@ pdf2image==1.16.0
frozendict==2.3.0
protobuf<=3.20.*
prometheus-client==0.13.1
fsspec==2022.11.0
PyMonad==2.4.0

1
test/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/data

5
test/data.dvc Normal file
View File

@ -0,0 +1,5 @@
outs:
- md5: 4b0fec291ce0661b3efbbd8b80f4f514.dir
size: 107332
nfiles: 4
path: data

View File

@ -1,44 +0,0 @@
[
{
"classification": {
"label": "formula",
"probabilities": {
"formula": 1.0,
"logo": 0.0,
"other": 0.0,
"signature": 0.0
}
},
"representation": "FFFEF0C7033648170F3EFFFFF",
"position": {
"x1": 321,
"x2": 515,
"y1": 348,
"y2": 542,
"pageNumber": 2
},
"geometry": {
"width": 194,
"height": 194
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 0.2741,
"tooLarge": false,
"tooSmall": false
},
"imageFormat": {
"quotient": 1.0,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": true
}
}
]

View File

@ -1,92 +0,0 @@
{
"input": [
{
"width": 100,
"height": 8,
"page_idx": 0,
"page_width": 100,
"page_height": 100,
"x1": 0,
"y1": 0,
"x2": 100,
"y2": 8
},
{
"width": 100,
"height": 9,
"page_idx": 0,
"page_width": 100,
"page_height": 100,
"x1": 0,
"y1": 9,
"x2": 100,
"y2": 18
},
{
"width": 100,
"height": 35,
"page_idx": 0,
"page_width": 100,
"page_height": 100,
"x1": 0,
"y1": 18,
"x2": 100,
"y2": 53
},
{
"width": 47,
"height": 46,
"page_idx": 0,
"page_width": 100,
"page_height": 100,
"x1": 0,
"y1": 54,
"x2": 47,
"y2": 100
},
{
"width": 31,
"height": 46,
"page_idx": 0,
"page_width": 100,
"page_height": 100,
"x1": 48,
"y1": 54,
"x2": 79,
"y2": 100
},
{
"width": 20,
"height": 19,
"page_idx": 0,
"page_width": 100,
"page_height": 100,
"x1": 80,
"y1": 54,
"x2": 100,
"y2": 73
},
{
"width": 20,
"height": 27,
"page_idx": 0,
"page_width": 100,
"page_height": 100,
"x1": 80,
"y1": 73,
"x2": 100,
"y2": 100
}
],
"target": {
"width": 100,
"height": 100,
"page_idx": 0,
"page_width": 100,
"page_height": 100,
"x1": 0,
"y1": 0,
"x2": 100,
"y2": 100
}
}

View File

@ -1,7 +1,21 @@
import numpy as np
import pytest
from dvc.repo import Repo
from image_prediction.locations import PACKAGE_ROOT_DIR, TEST_DATA_DIR_DVC
from image_prediction.utils import get_logger
logger = get_logger()
@pytest.fixture
def input_batch(batch_size, input_size):
return np.random.random_sample(size=(batch_size, *input_size))
@pytest.fixture(scope="session")
def dvc_test_data():
logger.info("Pulling data with DVC...")
# noinspection PyCallingNonCallable
Repo(PACKAGE_ROOT_DIR).pull(targets=[str(TEST_DATA_DIR_DVC)])
logger.info("Finished pulling data.")

12
test/fixtures/pdf.py vendored
View File

@ -4,7 +4,7 @@ import fpdf
import pytest
from image_prediction.locations import TEST_DATA_DIR
from test.utils.generation.pdf import add_image, pdf_stream
from test.utils.generation.pdf import add_image, pdf_stream, stream_pdf_bytes
@pytest.fixture
@ -18,6 +18,10 @@ def pdf(image_metadata_pairs):
@pytest.fixture
def real_pdf():
with open(os.path.join(TEST_DATA_DIR, "f2dc689ca794fccb8cd38b95f2bf6ba9.pdf"), "rb") as f:
yield f.read()
def real_pdf(dvc_test_data):
yield from stream_pdf_bytes(TEST_DATA_DIR / "f2dc689ca794fccb8cd38b95f2bf6ba9.pdf")
@pytest.fixture
def bad_xref_pdf(dvc_test_data):
yield from stream_pdf_bytes(TEST_DATA_DIR / "bad_xref.pdf")

View File

@ -87,7 +87,7 @@ def expected_predictions_mapped_and_formatted(expected_predictions_mapped):
@pytest.fixture
def real_expected_service_response():
def real_expected_service_response(dvc_test_data):
with open(os.path.join(TEST_DATA_DIR, "f2dc689ca794fccb8cd38b95f2bf6ba9_predictions.json"), "r") as f:
yield json.load(f)

View File

@ -9,7 +9,13 @@ from funcy import first, rest
from image_prediction.extraction import extract_images_from_pdf
from image_prediction.image_extractor.extractor import ImageMetadataPair
from image_prediction.image_extractor.extractors.parsable import extract_pages, has_alpha_channel, get_image_infos
from image_prediction.image_extractor.extractors.parsable import (
extract_pages,
has_alpha_channel,
get_image_infos,
extract_valid_metadata,
xref_to_image,
)
from image_prediction.info import Info
from test.utils.comparison import metadata_equal, image_sets_equal
from test.utils.generation.pdf import add_image, pdf_stream
@ -75,3 +81,12 @@ def test_has_alpha_channel(base_patch_metadata, suffix, mode):
assert not list(rest(xrefs))
doc.close()
def test_bad_xref_handling(bad_xref_pdf, dvc_test_data):
doc = fitz.Document(stream=bad_xref_pdf)
metadata = extract_valid_metadata(doc, first(doc))
xref = first(metadata)[Info.XREF]
assert not xref_to_image(doc, xref)

View File

@ -60,10 +60,10 @@ def test_image_stitcher(patch_image_metadata_pairs, base_patch_metadata, base_pa
assert images_equal(pair_stitched.image.resize((10, 10)), base_patch_image.resize((10, 10)), atol=0.4)
def test_image_stitcher_with_gaps_must_succeed():
def test_image_stitcher_with_gaps_must_succeed(dvc_test_data):
from image_prediction.locations import TEST_DATA_DIR
with open(os.path.join(TEST_DATA_DIR, "stitching_with_tolerance.json")) as f:
with open(TEST_DATA_DIR / "stitching_with_tolerance.json") as f:
patches_metadata, base_patch_metadata = itemgetter("input", "target")(ReverseEnumFormatter(Info)(json.load(f)))
images = map(gray_image_from_metadata, patches_metadata)

View File

@ -28,3 +28,8 @@ def add_image_to_last_page(pdf: fpdf.fpdf.FPDF, image_metadata_pair, suffix):
with tempfile.NamedTemporaryFile(suffix=f".{suffix}") as temp_image:
image.save(temp_image.name)
pdf.image(temp_image.name, x=x, y=y, w=w, h=h, type=suffix)
def stream_pdf_bytes(path: str):
with open(path, "rb") as f:
yield f.read()