Merge branch 'release/1.2.x' of ssh://git.iqser.com:2222/rr/image-prediction into RED-5107-robustify-image-service

This commit is contained in:
Julius Unverfehrt 2022-08-30 08:10:22 +02:00
commit 6c54cea57d
2 changed files with 16 additions and 1 deletions

View File

@ -50,6 +50,20 @@ class ParsablePDFImageExtractor(ImageExtractor):
yield from image_metadata_pairs yield from image_metadata_pairs
# def __preprocess(self, image_metadata_pair):
# image, metadata = image_metadata_pair
#
# try:
# image = self.__resize_and_convert(image)
# image_metadata_pair = ImageMetadataPair(image, metadata)
# except Exception as err:
# logger.warn(
# f"{err}: couldn't preprocess image [ page_idx: {metadata[Info.PAGE_IDX]}, x1: {metadata[Info.X1]}, y1: {metadata[Info.Y1]}, width: {metadata[Info.WIDTH]}, height: {metadata[Info.HEIGHT]} ]"
# )
# image_metadata_pair = None
#
# return image_metadata_pair
def extract_pages(doc, page_range): def extract_pages(doc, page_range):
page_range = range(page_range.start + 1, page_range.stop + 1) page_range = range(page_range.start + 1, page_range.stop + 1)

View File

@ -5,7 +5,7 @@ import fitz
import fpdf import fpdf
import pytest import pytest
from PIL import Image from PIL import Image
from funcy import first, rest from funcy import first, rest, lmap
from image_prediction.extraction import extract_images_from_pdf from image_prediction.extraction import extract_images_from_pdf
from image_prediction.image_extractor.extractor import ImageMetadataPair from image_prediction.image_extractor.extractor import ImageMetadataPair
@ -27,6 +27,7 @@ def test_image_extractor_mock(image_extractor, images):
@pytest.mark.parametrize("alpha", [False, True]) @pytest.mark.parametrize("alpha", [False, True])
def test_parsable_pdf_image_extractor(image_extractor, pdf, images, metadata, input_size, alpha): def test_parsable_pdf_image_extractor(image_extractor, pdf, images, metadata, input_size, alpha):
images_extracted, metadata_extracted = map(list, extract_images_from_pdf(pdf, image_extractor)) images_extracted, metadata_extracted = map(list, extract_images_from_pdf(pdf, image_extractor))
if not alpha: if not alpha:
assert image_sets_equal(images_extracted, images) assert image_sets_equal(images_extracted, images)
assert metadata_equal(metadata_extracted, metadata) assert metadata_equal(metadata_extracted, metadata)