replaced string keys for metadata fields with enum members
This commit is contained in:
parent
358d7ecd91
commit
7340fb6dda
@ -7,6 +7,7 @@ from PIL import Image
|
||||
from funcy import rcompose
|
||||
|
||||
from image_prediction.image_extractor.extractor import ImageExtractor, ImageMetadataPair
|
||||
from image_prediction.info import Info
|
||||
|
||||
|
||||
class ParsablePDFImageExtractor(ImageExtractor):
|
||||
@ -21,15 +22,15 @@ class ParsablePDFImageExtractor(ImageExtractor):
|
||||
x1, y1, x2, y2 = map(rounder, image_info["bbox"])
|
||||
width, height = itemgetter("width", "height")(image_info)
|
||||
return {
|
||||
"page_width": page_width,
|
||||
"page_height": page_height,
|
||||
"page_idx": page.number,
|
||||
"width": width,
|
||||
"height": height,
|
||||
"x1": x1,
|
||||
"x2": x2,
|
||||
"y1": y1,
|
||||
"y2": y2,
|
||||
Info.PAGE_WIDTH: page_width,
|
||||
Info.PAGE_HEIGHT: page_height,
|
||||
Info.PAGE_IDX: page.number,
|
||||
Info.WIDTH: width,
|
||||
Info.HEIGHT: height,
|
||||
Info.X1: x1,
|
||||
Info.X2: x2,
|
||||
Info.Y1: y1,
|
||||
Info.Y2: y2,
|
||||
}
|
||||
|
||||
rounder = rcompose(round, int)
|
||||
|
||||
13
image_prediction/info.py
Normal file
13
image_prediction/info.py
Normal file
@ -0,0 +1,13 @@
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class Info(Enum):
|
||||
PAGE_WIDTH = "page_width"
|
||||
PAGE_HEIGHT = "page_height"
|
||||
PAGE_IDX = "page_idx"
|
||||
WIDTH = "width"
|
||||
HEIGHT = "height"
|
||||
X1 = "x1"
|
||||
X2 = "x2"
|
||||
Y1 = "y1"
|
||||
Y2 = "y2"
|
||||
@ -5,4 +5,4 @@ class DatabaseConnector(abc.ABC):
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_object(self, identifier):
|
||||
pass
|
||||
raise NotImplementedError
|
||||
|
||||
@ -2,3 +2,4 @@
|
||||
norecursedirs = incl
|
||||
filterwarnings =
|
||||
ignore:.*imp.*:DeprecationWarning
|
||||
ignore:.*Use.*:DeprecationWarning
|
||||
|
||||
@ -18,6 +18,7 @@ from image_prediction.exceptions import UnknownEstimatorAdapter, UnknownImageExt
|
||||
from image_prediction.image_extractor.extractor import ImageMetadataPair
|
||||
from image_prediction.image_extractor.extractors.mock import ImageExtractorMock
|
||||
from image_prediction.image_extractor.extractors.parsable import ParsablePDFImageExtractor
|
||||
from image_prediction.info import Info
|
||||
from image_prediction.model_loader.database.connectors.mock import DatabaseConnectorMock
|
||||
from image_prediction.model_loader.loader import ModelLoader
|
||||
from image_prediction.model_loader.loaders.mlflow import MlflowConnector
|
||||
@ -30,6 +31,8 @@ def image_extractor(extractor_type):
|
||||
return ImageExtractorMock()
|
||||
elif extractor_type == "parsable_pdf":
|
||||
return ParsablePDFImageExtractor()
|
||||
elif extractor_type == "default":
|
||||
return None
|
||||
else:
|
||||
raise UnknownImageExtractor(f"No image extractor for type {extractor_type} was specified.")
|
||||
|
||||
@ -149,15 +152,15 @@ def metadata(images):
|
||||
y1 = random.randint(0, page_height - height)
|
||||
y2 = y1 + height
|
||||
metadata = {
|
||||
"page_width": page_width,
|
||||
"page_height": page_height,
|
||||
"page_idx": current_page_idx(),
|
||||
"width": width,
|
||||
"height": height,
|
||||
"x1": x1,
|
||||
"x2": x2,
|
||||
"y1": y1,
|
||||
"y2": y2
|
||||
Info.PAGE_WIDTH: page_width,
|
||||
Info.PAGE_HEIGHT: page_height,
|
||||
Info.PAGE_IDX: current_page_idx(),
|
||||
Info.WIDTH: width,
|
||||
Info.HEIGHT: height,
|
||||
Info.X1: x1,
|
||||
Info.X2: x2,
|
||||
Info.Y1: y1,
|
||||
Info.Y2: y2,
|
||||
}
|
||||
return metadata
|
||||
|
||||
@ -180,7 +183,7 @@ def pdf(image_metadata_pairs):
|
||||
|
||||
|
||||
def add_image(pdf, image_metadata_pair):
|
||||
while fewer_pages_then_required(image_metadata_pair.metadata["page_idx"], pdf):
|
||||
while fewer_pages_then_required(image_metadata_pair.metadata[Info.PAGE_IDX], pdf):
|
||||
pdf.add_page()
|
||||
|
||||
add_image_to_last_page(pdf, image_metadata_pair)
|
||||
@ -196,7 +199,7 @@ def pdf_stream(pdf: fpdf.fpdf.FPDF):
|
||||
|
||||
def add_image_to_last_page(pdf: fpdf.fpdf.FPDF, image_metadata_pair):
|
||||
image, metadata = image_metadata_pair
|
||||
x, y, w, h = itemgetter("x1", "y1", "width", "height")(metadata)
|
||||
x, y, w, h = itemgetter(Info.X1, Info.Y1, Info.WIDTH, Info.HEIGHT)(metadata)
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".png") as temp_image:
|
||||
image.save(temp_image.name)
|
||||
|
||||
@ -3,6 +3,7 @@ import pytest
|
||||
|
||||
from image_prediction.estimator.preprocessor.utils import images_to_batch_tensor
|
||||
from image_prediction.extraction import extract_images_from_pdf
|
||||
from image_prediction.image_extractor.extractors.parsable import ParsablePDFImageExtractor
|
||||
|
||||
|
||||
@pytest.mark.parametrize("extractor_type", ["mock"])
|
||||
@ -12,7 +13,7 @@ def test_image_extractor_mock(image_extractor, images):
|
||||
assert images_extracted == images
|
||||
|
||||
|
||||
@pytest.mark.parametrize("extractor_type", ["parsable_pdf"])
|
||||
@pytest.mark.parametrize("extractor_type", ["parsable_pdf", "default"])
|
||||
@pytest.mark.parametrize("batch_size", [0, 1, 2, 64])
|
||||
@pytest.mark.parametrize("input_size", [{"depth": 3, "width": 170, "height": 220}], indirect=["input_size"])
|
||||
def test_parsable_pdf_image_extractor(image_extractor, pdf, images, metadata, input_size):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user