coordinate transformers version 1 completed

This commit is contained in:
Matthias Bisping 2022-04-03 03:51:31 +02:00
parent f6a7a14a20
commit bf85ef357c
5 changed files with 84 additions and 40 deletions

View File

@ -1,9 +1,7 @@
import abc
from image_prediction.transformer.transformer import Transformer
class PDFNetCoordinateTransformer(Transformer):
class CoordinateTransformer:
@abc.abstractmethod
def forward(self, metadata):
@ -12,4 +10,3 @@ class PDFNetCoordinateTransformer(Transformer):
@abc.abstractmethod
def backward(self, metadata):
raise NotImplementedError

View File

@ -1,8 +1,11 @@
from image_prediction.transformer.transformer import Transformer
from image_prediction.transformer.transformers.coordinate.coordinate_transformer import CoordinateTransformer
class FitzCoordinateTransformer(Transformer):
class FitzCoordinateTransformer(CoordinateTransformer):
def transform(self, metadata: dict):
def forward(self, metadata: dict):
"""Fitz uses top left corner as origin; we take this as the reference coordinate system."""
return metadata
def backward(self, metadata: dict):
return self.forward(metadata)

View File

@ -1,8 +1,11 @@
from image_prediction.transformer.transformer import Transformer
from image_prediction.transformer.transformers.coordinate.coordinate_transformer import CoordinateTransformer
class FPDFCoordinateTransformer(Transformer):
class FPDFCoordinateTransformer(CoordinateTransformer):
def transform(self, metadata: dict):
def forward(self, metadata: dict):
"""FPDF uses top left corner as origin; we take this as the reference coordinate system."""
return metadata
def backward(self, metadata: dict):
return self.forward(metadata)

View File

@ -3,16 +3,17 @@ from operator import itemgetter
from funcy import omit
from image_prediction.info import Info
from image_prediction.transformer.transformer import Transformer
from image_prediction.transformer.transformers.coordinate.coordinate_transformer import CoordinateTransformer
class PDFNetCoordinateTransformer(Transformer):
class PDFNetCoordinateTransformer(CoordinateTransformer):
def transform(self, metadata: dict):
def forward(self, metadata: dict):
"""PDFNet coordinate system origin is in the bottom left corner."""
y1, y2, page_height = itemgetter(Info.Y1, Info.Y2, Info.PAGE_HEIGHT)(metadata)
y1_t = page_height - y2
y2_t = page_height - y1
return {**omit(metadata, [Info.Y1, Info.Y2]), **{Info.Y1: y1_t, Info.Y2: y2_t}}
def backward(self, metadata: dict):
return self.forward(metadata)

View File

@ -1,21 +1,15 @@
import os
import tempfile
import time
import numpy as np
import pytest
from fpdf import fpdf
from funcy import project, second
from funcy import project, second, compose
from pdf2image import pdf2image
from image_prediction.formatter.formatters.enum import EnumFormatter
from image_prediction.image_extractor.extractor import ImageMetadataPair
from image_prediction.image_extractor.extractors.parsable import ParsablePDFImageExtractor
from image_prediction.info import Info
from image_prediction.transformer.transformers.coordinate.fitz import FitzCoordinateTransformer
from image_prediction.transformer.transformers.coordinate.fpdf import FPDFCoordinateTransformer
from image_prediction.transformer.transformers.coordinate.pdfnet import PDFNetCoordinateTransformer
from image_prediction.utils.pdf_annotation import annotate_pdf, draw_metadata_box
from test.conftest import array_to_image, add_image
@ -54,6 +48,18 @@ def get_fitz_coordinates(corner):
return metadata
def get_pdfnet_coordinates(corner):
metadata = base_position_metadata()
if corner == "top_left":
metadata.update({Info.X1: 0, Info.Y1: 200, Info.X2: 100, Info.Y2: 300})
elif corner == "bottom_left":
metadata.update({Info.X1: 0, Info.Y1: 0, Info.X2: 100, Info.Y2: 100})
return metadata
def base_position_metadata(width=100, height=100):
return {
Info.X1: 0,
@ -76,6 +82,9 @@ def position_metadata_in_given_system(corner, coordinate_system):
elif coordinate_system == "fitz":
return get_fitz_coordinates(corner)
elif coordinate_system == "pdfnet":
return get_pdfnet_coordinates(corner)
else:
raise ValueError(f"Unknown coordinate system: {coordinate_system}")
@ -99,13 +108,21 @@ def coordinate_test_page_image(coordinate_test_pdf):
@pytest.mark.parametrize("corner", ["top_left", "bottom_left"])
@pytest.mark.parametrize("coordinate_system", ["fpdf"])
def test_fpdf_coordinate_transformer(position_metadata_in_given_system, position_metadata_in_reference_system):
assert FPDFCoordinateTransformer()(position_metadata_in_given_system) == position_metadata_in_reference_system
transformer = FPDFCoordinateTransformer()
assert transformer.forward(position_metadata_in_given_system) == position_metadata_in_reference_system
assert transformer.backward(position_metadata_in_reference_system) == position_metadata_in_given_system
assert compose(
transformer.backward,
transformer.forward
)(position_metadata_in_reference_system) == position_metadata_in_reference_system
@pytest.mark.parametrize("corner", ["top_left", "bottom_left"])
@pytest.mark.parametrize("coordinate_system", ["fitz"])
def test_fitz_coordinate_transformer(
position_metadata_in_given_system, position_metadata_in_reference_system, coordinate_test_pdf, coordinate_test_fpdf
position_metadata_in_given_system, position_metadata_in_reference_system, coordinate_test_pdf,
coordinate_test_fpdf
):
# coordinate_test_fpdf.output("/tmp/x.pdf")
# coordinate_test_page_image.show()
@ -114,23 +131,46 @@ def test_fitz_coordinate_transformer(
position_extracted = project(metadata_extracted, list(position_metadata_in_given_system.keys()))
assert position_extracted == position_metadata_in_given_system
assert FitzCoordinateTransformer()(position_metadata_in_given_system) == position_metadata_in_reference_system
transformer = FitzCoordinateTransformer()
assert transformer.forward(position_metadata_in_given_system) == position_metadata_in_reference_system
assert transformer.backward(position_metadata_in_reference_system) == position_metadata_in_given_system
assert compose(
transformer.backward,
transformer.forward
)(position_metadata_in_reference_system) == position_metadata_in_reference_system
@pytest.mark.parametrize("corner", ["bottom_left"])
@pytest.mark.parametrize("coordinate_system", ["fitz"])
def test_pdfnet_coordinate_transformer(
position_metadata_in_given_system, position_metadata_in_reference_system, coordinate_test_fpdf, coordinate_test_page_image
):
# coordinate_test_page_image.show()
# coordinate_test_fpdf.output("/tmp/xyz.pdf")
# @pytest.mark.parametrize("corner", ["top_left", "bottom_left"])
# @pytest.mark.parametrize("coordinate_system", ["fitz"])
# def test_pdfnet_coordinate_transformer(
# position_metadata_in_given_system, position_metadata_in_reference_system, coordinate_test_fpdf, coordinate_test_page_image
# ):
# # coordinate_test_page_image.show()
# # coordinate_test_fpdf.output("/tmp/xyz.pdf")
#
# position_metadata_transformed = PDFNetCoordinateTransformer().forward(position_metadata_in_reference_system)
# print(json.dumps(EnumFormatter()(position_metadata_transformed), indent=2))
#
# with tempfile.NamedTemporaryFile(suffix=".pdf") as f:
# coordinate_test_fpdf.output(f.name)
# draw_metadata_box(f.name, EnumFormatter()(position_metadata_transformed),
# # os.path.join("/tmp", os.path.basename(f.name.replace(".pdf", "_annotated.pdf")))
# "/tmp/bla.pdf"
# )
# input()
position_metadata_transformed = PDFNetCoordinateTransformer()(position_metadata_in_given_system)
@pytest.mark.parametrize("corner", ["top_left", "bottom_left"])
@pytest.mark.parametrize("coordinate_system", ["pdfnet"])
def test_pdfnet_coordinate_transformer(position_metadata_in_given_system, position_metadata_in_reference_system):
transformer = PDFNetCoordinateTransformer()
with tempfile.NamedTemporaryFile(suffix=".pdf") as f:
coordinate_test_fpdf.output(f.name)
draw_metadata_box(f.name, EnumFormatter()(position_metadata_transformed),
# os.path.join("/tmp", os.path.basename(f.name.replace(".pdf", "_annotated.pdf")))
"/tmp/bla.pdf"
)
input()
assert transformer.forward(position_metadata_in_reference_system) == position_metadata_in_given_system
assert transformer.backward(position_metadata_in_given_system) == position_metadata_in_reference_system
assert compose(
transformer.backward,
transformer.forward
)(position_metadata_in_reference_system) == position_metadata_in_reference_system