refactoring

This commit is contained in:
Matthias Bisping 2022-04-04 13:12:08 +02:00
parent 0976971117
commit 1ffc9dcc68

View File

@ -12,15 +12,67 @@ from image_prediction.transformer.transformers.coordinate.pdfnet import PDFNetCo
from test.conftest import array_to_image, add_image
@pytest.fixture
def coordinate_test_image():
return array_to_image(np.zeros(shape=(100, 100, 3)))
@pytest.mark.parametrize("coordinate_system", ["fpdf"])
def test_fpdf_coordinate_transformer(position_metadata_in_given_system, position_metadata_in_reference_system):
"""We use FPDF's coordinate system as the reference system (arbitrarily). Hence, FPDFCoordinateTransformer
actually does not do anything. This test merely documents the fact, that FPDF is the reference system.
"""
transformer = FPDFCoordinateTransformer()
assert transformer.forward(position_metadata_in_given_system) == position_metadata_in_reference_system
assert transformer.backward(position_metadata_in_reference_system) == position_metadata_in_given_system
assert (
compose(transformer.backward, transformer.forward)(position_metadata_in_reference_system)
== position_metadata_in_reference_system
)
@pytest.mark.parametrize("coordinate_system", ["fitz"])
def test_fitz_coordinate_transformer(position_metadata_in_given_system, position_metadata_in_reference_system):
"""How I inferred the transformation:
- extract images from coordinate_test_fpdf with ParsablePDFImageExtractor (see test_parsable_pdf_image_extractor)
- Compare position_metadata_in_given_system (fitz) with position_metadata_in_reference_system (fpdf)
- Observe that they are identical
"""
transformer = FitzCoordinateTransformer()
assert transformer.forward(position_metadata_in_given_system) == position_metadata_in_reference_system
assert transformer.backward(position_metadata_in_reference_system) == position_metadata_in_given_system
assert (
compose(transformer.backward, transformer.forward)(position_metadata_in_reference_system)
== position_metadata_in_reference_system
)
@pytest.mark.parametrize("coordinate_system", ["pdfnet"])
def test_pdfnet_coordinate_transformer(position_metadata_in_given_system, position_metadata_in_reference_system):
"""How I inferred the transformation:
- save coordinate_test_fpdf to disk as file f
- draw boxes for position_metadata_in_reference_system in f with draw_metadata_box
- save annotated pdf as file g
- look at discrepancy between the black square and the red box in g
"""
transformer = PDFNetCoordinateTransformer()
assert transformer.forward(position_metadata_in_reference_system) == position_metadata_in_given_system
assert transformer.backward(position_metadata_in_given_system) == position_metadata_in_reference_system
assert (
compose(transformer.backward, transformer.forward)(position_metadata_in_reference_system)
== position_metadata_in_reference_system
)
@pytest.fixture
def coordinate_test_fpdf():
pdf = fpdf.FPDF(unit="pt", format=(300, 300))
return pdf
def position_metadata_in_given_system(corner, corner2metadata_in_given_system):
return corner2metadata_in_given_system[corner]
@pytest.fixture
def position_metadata_in_reference_system(corner, corner2metadata_in_reference_system):
return corner2metadata_in_reference_system[corner]
@pytest.fixture(params=["top_left", "bottom_left", "bottom_right", "top_right"])
@ -28,11 +80,27 @@ def corner(request):
return request.param
def set_coords(metadata, x1, y1, x2, y2):
metadata.update({Info.X1: x1, Info.Y1: y1, Info.X2: x2, Info.Y2: y2})
@pytest.fixture
def corner2metadata_in_given_system(coordinate_system):
if coordinate_system == "fpdf":
return get_fpdf_corner_metadat()
elif coordinate_system == "fitz":
return get_fitz_corner_metadat()
elif coordinate_system == "pdfnet":
return get_pdfnet_corner_metadata()
else:
raise ValueError(f"Unknown coordinate system: {coordinate_system}")
def get_fpdf_coordinates(corner):
@pytest.fixture
def corner2metadata_in_reference_system():
return get_fpdf_corner_metadat()
def get_fpdf_corner_metadat():
"""Origin top left, y1 <= y2; all coords on page are positive
(0,0)--+--(2,0)--+
|////| |////|
@ -42,31 +110,19 @@ def get_fpdf_coordinates(corner):
|////| |////|
+--(1,3) +--(3,3)
"""
metadata = base_position_metadata()
if corner == "top_left":
set_coords(metadata, 0, 0, 100, 100)
elif corner == "bottom_left":
set_coords(metadata, 0, 200, 100, 300)
elif corner == "bottom_right":
set_coords(metadata, 200, 200, 300, 300)
elif corner == "top_right":
set_coords(metadata, 200, 0, 300, 100)
else:
raise ValueError(f"No coordinates specified for corner {corner}.")
return metadata
return {
"top_left": get_metadata_for_coords(0, 0, 100, 100),
"bottom_left": get_metadata_for_coords(0, 200, 100, 300),
"bottom_right": get_metadata_for_coords(200, 200, 300, 300),
"top_right": get_metadata_for_coords(200, 0, 300, 100)
}
def get_fitz_coordinates(corner):
return get_fpdf_coordinates(corner)
def get_fitz_corner_metadat():
return get_fpdf_corner_metadat()
def get_pdfnet_coordinates(corner):
def get_pdfnet_corner_metadata():
"""Origin bottom left, y1 <= y2; all coords on page are positive
+---(1,3) +--(3,3)
|////| |////|
@ -76,23 +132,17 @@ def get_pdfnet_coordinates(corner):
|////| |////|
(0,0)--+ (2,0)--+
"""
return {
"top_left": get_metadata_for_coords(0, 200, 100, 300),
"bottom_left": get_metadata_for_coords(0, 0, 100, 100),
"bottom_right": get_metadata_for_coords(200, 0, 300, 100),
"top_right": get_metadata_for_coords(200, 200, 300, 300),
}
def get_metadata_for_coords(x1, y1, x2, y2):
metadata = base_position_metadata()
if corner == "top_left":
set_coords(metadata, 0, 200, 100, 300)
elif corner == "bottom_left":
set_coords(metadata, 0, 0, 100, 100)
elif corner == "bottom_right":
set_coords(metadata, 200, 0, 300, 100)
elif corner == "top_right":
set_coords(metadata, 200, 200, 300, 300)
else:
raise ValueError(f"No coordinates specified for corner {corner}.")
metadata.update({Info.X1: x1, Info.Y1: y1, Info.X2: x2, Info.Y2: y2})
return metadata
@ -110,32 +160,7 @@ def base_position_metadata(width=100, height=100):
}
@pytest.fixture(params=["list", "dict"])
def coordinate_type(request):
return request.param
@pytest.fixture
def position_metadata_in_given_system(corner, coordinate_system, coordinate_type):
if coordinate_system == "fpdf":
coordinates = get_fpdf_coordinates(corner)
elif coordinate_system == "fitz":
coordinates = get_fitz_coordinates(corner)
elif coordinate_system == "pdfnet":
coordinates = get_pdfnet_coordinates(corner)
else:
raise ValueError(f"Unknown coordinate system: {coordinate_system}")
return [coordinates, coordinates] if coordinate_type == "list" else coordinates
@pytest.fixture
def position_metadata_in_reference_system(corner, coordinate_type):
coordinates = get_fpdf_coordinates(corner)
return [coordinates, coordinates] if coordinate_type == "list" else coordinates
# utils not needed for tests but for inferring new coordinates systems:
@pytest.fixture
@ -144,60 +169,22 @@ def coordinate_test_pdf(position_metadata_in_given_system, coordinate_test_image
return coordinate_test_fpdf.output(dest="S").encode("latin1")
@pytest.fixture
def coordinate_test_image():
return array_to_image(np.zeros(shape=(100, 100, 3)))
@pytest.fixture
def coordinate_test_fpdf():
pdf = fpdf.FPDF(unit="pt", format=(300, 300))
return pdf
@pytest.fixture
def coordinate_test_page_image(coordinate_test_pdf):
return pdf2image.convert_from_bytes(coordinate_test_pdf)[0]
@pytest.mark.parametrize("coordinate_system", ["fpdf"])
def test_fpdf_coordinate_transformer(position_metadata_in_given_system, position_metadata_in_reference_system):
"""We use FPDF's coordinate system as the reference system (arbitrarily). Hence, FPDFCoordinateTransformer
actually does not do anything. This test merely documents the fact, that FPDF is the reference system.
"""
transformer = FPDFCoordinateTransformer()
assert transformer.forward(position_metadata_in_given_system) == position_metadata_in_reference_system
assert transformer.backward(position_metadata_in_reference_system) == position_metadata_in_given_system
assert (
compose(transformer.backward, transformer.forward)(position_metadata_in_reference_system)
== position_metadata_in_reference_system
)
@pytest.mark.parametrize("coordinate_system", ["fitz"])
def test_fitz_coordinate_transformer(position_metadata_in_given_system, position_metadata_in_reference_system):
"""How I inferred the transformation:
- extract images from coordinate_test_fpdf with ParsablePDFImageExtractor (see test_parsable_pdf_image_extractor)
- Compare position_metadata_in_given_system (fitz) with position_metadata_in_reference_system (fpdf)
- Observe that they are identical
"""
transformer = FitzCoordinateTransformer()
assert transformer.forward(position_metadata_in_given_system) == position_metadata_in_reference_system
assert transformer.backward(position_metadata_in_reference_system) == position_metadata_in_given_system
assert (
compose(transformer.backward, transformer.forward)(position_metadata_in_reference_system)
== position_metadata_in_reference_system
)
@pytest.mark.parametrize("coordinate_system", ["pdfnet"])
def test_pdfnet_coordinate_transformer(position_metadata_in_given_system, position_metadata_in_reference_system):
"""How I inferred the transformation:
- save coordinate_test_fpdf to disk as file f
- draw boxes for position_metadata_in_reference_system in f with draw_metadata_box
- save annotated pdf as file g
- look at discrepancy between the black square and the red box in g
"""
transformer = PDFNetCoordinateTransformer()
assert transformer.forward(position_metadata_in_reference_system) == position_metadata_in_given_system
assert transformer.backward(position_metadata_in_given_system) == position_metadata_in_reference_system
assert (
compose(transformer.backward, transformer.forward)(position_metadata_in_reference_system)
== position_metadata_in_reference_system
)
@pytest.fixture(params=["list", "dict"])
def coordinate_type(request):
return request.param