Merge branch 'master' of ssh://git.iqser.com:2222/rr/image-prediction into RED-6189-bugfix
This commit is contained in:
commit
79455f0dd6
@ -1,6 +1,6 @@
|
||||
webserver:
|
||||
host: $SERVER_HOST|"127.0.0.1" # webserver address
|
||||
port: $SERVER_PORT|5000 # webserver port
|
||||
host: $SERVER_HOST|"127.0.0.1" # Webserver address
|
||||
port: $SERVER_PORT|5000 # Webserver port
|
||||
|
||||
service:
|
||||
logging_level: $LOGGING_LEVEL_ROOT|INFO # Logging level for service logger
|
||||
|
||||
@ -36,3 +36,7 @@ class InvalidBox(Exception):
|
||||
|
||||
class ParsingError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class BadXref(ValueError):
|
||||
pass
|
||||
|
||||
@ -13,7 +13,7 @@ from PIL import Image
|
||||
from funcy import merge, pluck, compose, rcompose, remove, keep
|
||||
|
||||
from image_prediction.config import CONFIG
|
||||
from image_prediction.exceptions import InvalidBox
|
||||
from image_prediction.exceptions import InvalidBox, BadXref
|
||||
from image_prediction.formatter.formatters.enum import EnumFormatter
|
||||
from image_prediction.image_extractor.extractor import ImageExtractor, ImageMetadataPair
|
||||
from image_prediction.info import Info
|
||||
|
||||
@ -3,15 +3,14 @@
|
||||
from pathlib import Path
|
||||
|
||||
MODULE_DIR = Path(__file__).resolve().parents[0]
|
||||
|
||||
PACKAGE_ROOT_DIR = MODULE_DIR.parents[0]
|
||||
|
||||
CONFIG_FILE = PACKAGE_ROOT_DIR / "config.yaml"
|
||||
|
||||
BANNER_FILE = PACKAGE_ROOT_DIR / "banner.txt"
|
||||
|
||||
DATA_DIR = PACKAGE_ROOT_DIR / "data"
|
||||
|
||||
MLRUNS_DIR = str(DATA_DIR / "mlruns")
|
||||
|
||||
TEST_DATA_DIR = PACKAGE_ROOT_DIR / "test" / "data"
|
||||
TEST_DIR = PACKAGE_ROOT_DIR / "test"
|
||||
TEST_DATA_DIR = TEST_DIR / "data"
|
||||
TEST_DATA_DIR_DVC = TEST_DIR / "data.dvc"
|
||||
|
||||
@ -1,6 +1,15 @@
|
||||
from functools import wraps
|
||||
from inspect import signature
|
||||
from itertools import starmap
|
||||
from typing import Callable
|
||||
|
||||
from funcy import iterate, first, curry, map
|
||||
from pymonad.either import Left, Right, Either
|
||||
from pymonad.tools import curry as pmcurry
|
||||
|
||||
from image_prediction.utils import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
def until(cond, func, *args, **kwargs):
|
||||
@ -13,3 +22,63 @@ def lift(fn):
|
||||
|
||||
def starlift(fn):
|
||||
return curry(starmap)(fn)
|
||||
|
||||
|
||||
def bottom(*args, **kwargs):
|
||||
return False
|
||||
|
||||
|
||||
def top(*args, **kwargs):
|
||||
return True
|
||||
|
||||
|
||||
def left(fn):
|
||||
@wraps(fn)
|
||||
def inner(x):
|
||||
return Left(fn(x))
|
||||
|
||||
return inner
|
||||
|
||||
|
||||
def right(fn):
|
||||
@wraps(fn)
|
||||
def inner(x):
|
||||
return Right(fn(x))
|
||||
|
||||
return inner
|
||||
|
||||
|
||||
def wrap_left(fn, success_condition=top, error_message=None) -> Callable:
|
||||
return wrap_either(Left, Right, success_condition=success_condition, error_message=error_message)(fn)
|
||||
|
||||
|
||||
def wrap_right(fn, success_condition=top, error_message=None) -> Callable:
|
||||
return wrap_either(Right, Left, success_condition=success_condition, error_message=error_message)(fn)
|
||||
|
||||
|
||||
def wrap_either(success_type, failure_type, success_condition=top, error_message=None) -> Callable:
|
||||
@wraps(wrap_either)
|
||||
def wrapper(fn) -> Callable:
|
||||
|
||||
n_params = len(signature(fn).parameters)
|
||||
|
||||
@pmcurry(n_params)
|
||||
@wraps(fn)
|
||||
def wrapper(*args, **kwargs) -> Either:
|
||||
try:
|
||||
result = fn(*args, **kwargs)
|
||||
if success_condition(result):
|
||||
return success_type(result)
|
||||
else:
|
||||
return failure_type({"error": error_message, "result": result})
|
||||
except Exception as err:
|
||||
logger.error(err)
|
||||
return failure_type({"error": error_message or err, "result": Void})
|
||||
|
||||
return wrapper
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
class Void:
|
||||
pass
|
||||
|
||||
@ -23,3 +23,5 @@ pdf2image==1.16.0
|
||||
frozendict==2.3.0
|
||||
protobuf<=3.20.*
|
||||
prometheus-client==0.13.1
|
||||
fsspec==2022.11.0
|
||||
PyMonad==2.4.0
|
||||
|
||||
1
test/.gitignore
vendored
Normal file
1
test/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
/data
|
||||
5
test/data.dvc
Normal file
5
test/data.dvc
Normal file
@ -0,0 +1,5 @@
|
||||
outs:
|
||||
- md5: 4b0fec291ce0661b3efbbd8b80f4f514.dir
|
||||
size: 107332
|
||||
nfiles: 4
|
||||
path: data
|
||||
Binary file not shown.
@ -1,44 +0,0 @@
|
||||
[
|
||||
{
|
||||
"classification": {
|
||||
"label": "formula",
|
||||
"probabilities": {
|
||||
"formula": 1.0,
|
||||
"logo": 0.0,
|
||||
"other": 0.0,
|
||||
"signature": 0.0
|
||||
}
|
||||
},
|
||||
"representation": "FFFEF0C7033648170F3EFFFFF",
|
||||
"position": {
|
||||
"x1": 321,
|
||||
"x2": 515,
|
||||
"y1": 348,
|
||||
"y2": 542,
|
||||
"pageNumber": 2
|
||||
},
|
||||
"geometry": {
|
||||
"width": 194,
|
||||
"height": 194
|
||||
},
|
||||
"alpha": false,
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.2741,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 1.0,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
}
|
||||
]
|
||||
@ -1,92 +0,0 @@
|
||||
{
|
||||
"input": [
|
||||
{
|
||||
"width": 100,
|
||||
"height": 8,
|
||||
"page_idx": 0,
|
||||
"page_width": 100,
|
||||
"page_height": 100,
|
||||
"x1": 0,
|
||||
"y1": 0,
|
||||
"x2": 100,
|
||||
"y2": 8
|
||||
},
|
||||
{
|
||||
"width": 100,
|
||||
"height": 9,
|
||||
"page_idx": 0,
|
||||
"page_width": 100,
|
||||
"page_height": 100,
|
||||
"x1": 0,
|
||||
"y1": 9,
|
||||
"x2": 100,
|
||||
"y2": 18
|
||||
},
|
||||
{
|
||||
"width": 100,
|
||||
"height": 35,
|
||||
"page_idx": 0,
|
||||
"page_width": 100,
|
||||
"page_height": 100,
|
||||
"x1": 0,
|
||||
"y1": 18,
|
||||
"x2": 100,
|
||||
"y2": 53
|
||||
},
|
||||
{
|
||||
"width": 47,
|
||||
"height": 46,
|
||||
"page_idx": 0,
|
||||
"page_width": 100,
|
||||
"page_height": 100,
|
||||
"x1": 0,
|
||||
"y1": 54,
|
||||
"x2": 47,
|
||||
"y2": 100
|
||||
},
|
||||
{
|
||||
"width": 31,
|
||||
"height": 46,
|
||||
"page_idx": 0,
|
||||
"page_width": 100,
|
||||
"page_height": 100,
|
||||
"x1": 48,
|
||||
"y1": 54,
|
||||
"x2": 79,
|
||||
"y2": 100
|
||||
},
|
||||
{
|
||||
"width": 20,
|
||||
"height": 19,
|
||||
"page_idx": 0,
|
||||
"page_width": 100,
|
||||
"page_height": 100,
|
||||
"x1": 80,
|
||||
"y1": 54,
|
||||
"x2": 100,
|
||||
"y2": 73
|
||||
},
|
||||
{
|
||||
"width": 20,
|
||||
"height": 27,
|
||||
"page_idx": 0,
|
||||
"page_width": 100,
|
||||
"page_height": 100,
|
||||
"x1": 80,
|
||||
"y1": 73,
|
||||
"x2": 100,
|
||||
"y2": 100
|
||||
}
|
||||
],
|
||||
"target": {
|
||||
"width": 100,
|
||||
"height": 100,
|
||||
"page_idx": 0,
|
||||
"page_width": 100,
|
||||
"page_height": 100,
|
||||
"x1": 0,
|
||||
"y1": 0,
|
||||
"x2": 100,
|
||||
"y2": 100
|
||||
}
|
||||
}
|
||||
14
test/fixtures/input.py
vendored
14
test/fixtures/input.py
vendored
@ -1,7 +1,21 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
from dvc.repo import Repo
|
||||
|
||||
from image_prediction.locations import PACKAGE_ROOT_DIR, TEST_DATA_DIR_DVC
|
||||
from image_prediction.utils import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def input_batch(batch_size, input_size):
|
||||
return np.random.random_sample(size=(batch_size, *input_size))
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def dvc_test_data():
|
||||
logger.info("Pulling data with DVC...")
|
||||
# noinspection PyCallingNonCallable
|
||||
Repo(PACKAGE_ROOT_DIR).pull(targets=[str(TEST_DATA_DIR_DVC)])
|
||||
logger.info("Finished pulling data.")
|
||||
|
||||
12
test/fixtures/pdf.py
vendored
12
test/fixtures/pdf.py
vendored
@ -4,7 +4,7 @@ import fpdf
|
||||
import pytest
|
||||
|
||||
from image_prediction.locations import TEST_DATA_DIR
|
||||
from test.utils.generation.pdf import add_image, pdf_stream
|
||||
from test.utils.generation.pdf import add_image, pdf_stream, stream_pdf_bytes
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@ -18,6 +18,10 @@ def pdf(image_metadata_pairs):
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def real_pdf():
|
||||
with open(os.path.join(TEST_DATA_DIR, "f2dc689ca794fccb8cd38b95f2bf6ba9.pdf"), "rb") as f:
|
||||
yield f.read()
|
||||
def real_pdf(dvc_test_data):
|
||||
yield from stream_pdf_bytes(TEST_DATA_DIR / "f2dc689ca794fccb8cd38b95f2bf6ba9.pdf")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def bad_xref_pdf(dvc_test_data):
|
||||
yield from stream_pdf_bytes(TEST_DATA_DIR / "bad_xref.pdf")
|
||||
|
||||
2
test/fixtures/target.py
vendored
2
test/fixtures/target.py
vendored
@ -87,7 +87,7 @@ def expected_predictions_mapped_and_formatted(expected_predictions_mapped):
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def real_expected_service_response():
|
||||
def real_expected_service_response(dvc_test_data):
|
||||
with open(os.path.join(TEST_DATA_DIR, "f2dc689ca794fccb8cd38b95f2bf6ba9_predictions.json"), "r") as f:
|
||||
yield json.load(f)
|
||||
|
||||
|
||||
@ -9,7 +9,13 @@ from funcy import first, rest
|
||||
|
||||
from image_prediction.extraction import extract_images_from_pdf
|
||||
from image_prediction.image_extractor.extractor import ImageMetadataPair
|
||||
from image_prediction.image_extractor.extractors.parsable import extract_pages, has_alpha_channel, get_image_infos
|
||||
from image_prediction.image_extractor.extractors.parsable import (
|
||||
extract_pages,
|
||||
has_alpha_channel,
|
||||
get_image_infos,
|
||||
extract_valid_metadata,
|
||||
xref_to_image,
|
||||
)
|
||||
from image_prediction.info import Info
|
||||
from test.utils.comparison import metadata_equal, image_sets_equal
|
||||
from test.utils.generation.pdf import add_image, pdf_stream
|
||||
@ -75,3 +81,12 @@ def test_has_alpha_channel(base_patch_metadata, suffix, mode):
|
||||
assert not list(rest(xrefs))
|
||||
|
||||
doc.close()
|
||||
|
||||
|
||||
def test_bad_xref_handling(bad_xref_pdf, dvc_test_data):
|
||||
|
||||
doc = fitz.Document(stream=bad_xref_pdf)
|
||||
metadata = extract_valid_metadata(doc, first(doc))
|
||||
xref = first(metadata)[Info.XREF]
|
||||
|
||||
assert not xref_to_image(doc, xref)
|
||||
|
||||
@ -60,10 +60,10 @@ def test_image_stitcher(patch_image_metadata_pairs, base_patch_metadata, base_pa
|
||||
assert images_equal(pair_stitched.image.resize((10, 10)), base_patch_image.resize((10, 10)), atol=0.4)
|
||||
|
||||
|
||||
def test_image_stitcher_with_gaps_must_succeed():
|
||||
def test_image_stitcher_with_gaps_must_succeed(dvc_test_data):
|
||||
from image_prediction.locations import TEST_DATA_DIR
|
||||
|
||||
with open(os.path.join(TEST_DATA_DIR, "stitching_with_tolerance.json")) as f:
|
||||
with open(TEST_DATA_DIR / "stitching_with_tolerance.json") as f:
|
||||
patches_metadata, base_patch_metadata = itemgetter("input", "target")(ReverseEnumFormatter(Info)(json.load(f)))
|
||||
|
||||
images = map(gray_image_from_metadata, patches_metadata)
|
||||
|
||||
@ -28,3 +28,8 @@ def add_image_to_last_page(pdf: fpdf.fpdf.FPDF, image_metadata_pair, suffix):
|
||||
with tempfile.NamedTemporaryFile(suffix=f".{suffix}") as temp_image:
|
||||
image.save(temp_image.name)
|
||||
pdf.image(temp_image.name, x=x, y=y, w=w, h=h, type=suffix)
|
||||
|
||||
|
||||
def stream_pdf_bytes(path: str):
|
||||
with open(path, "rb") as f:
|
||||
yield f.read()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user