Pull request #23: Add pdf2image module
Merge in RR/cv-analysis from add-pdf2image-module to master
Squashed commit of the following:
commit 13355e2dd006fae9ee05c2d00acbbc8b38fd1e8e
Merge: eaf4627 edbda58
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Aug 2 13:35:27 2022 +0200
Merge branch 'master' of ssh://git.iqser.com:2222/rr/cv-analysis into add-pdf2image-module
commit eaf462768787642889d496203034d017c4ec959b
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Aug 2 13:26:58 2022 +0200
update build scripts
commit d429c713f4e5e74afca81c2354e8125bf389b865
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Aug 2 13:11:07 2022 +0200
purge target
commit 349b81c5db724bf70d6f31b58ded2b5414216bfe
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Aug 2 13:07:58 2022 +0200
Revert "extinguish target"
This reverts commit d2bd4cefde0648d2487839b0344509b984435273.
commit d2bd4cefde0648d2487839b0344509b984435273
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Aug 2 12:57:50 2022 +0200
extinguish target
commit 5f6cc713db31e3e16c8e7f13a59804c86b5d77d7
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Aug 2 11:58:52 2022 +0200
refactor
commit 576019378a39b580b816d9eb7957774f1faf48b9
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Aug 2 11:52:04 2022 +0200
add test for adjustesd server analysis pipeline logic
commit bdf0121929d6941cbba565055f37df7970925c79
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Aug 2 11:30:17 2022 +0200
update analysis pipline logic to use imported pdf2image
commit f7cef98d5e6d7b95517bbd047dd3e958acebb3d8
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Aug 2 11:04:34 2022 +0200
add pdf2image as git submodule
This commit is contained in:
parent
edbda58837
commit
016abe46de
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -1,3 +1,6 @@
|
||||
[submodule "incl/pyinfra"]
|
||||
path = incl/pyinfra
|
||||
url = ssh://git@git.iqser.com:2222/rr/pyinfra.git
|
||||
[submodule "incl/pdf2image"]
|
||||
path = incl/pdf2image
|
||||
url = ssh://git@git.iqser.com:2222/rr/pdf2image.git
|
||||
|
||||
@ -13,8 +13,13 @@ RUN python3 -m pip install -r requirements.txt
|
||||
COPY ./incl/pyinfra/requirements.txt ./incl/pyinfra/requirements.txt
|
||||
RUN python -m pip install -r incl/pyinfra/requirements.txt
|
||||
|
||||
COPY ./incl/pdf2image/requirements.txt ./incl/pdf2image/requirements.txt
|
||||
RUN python -m pip install -r incl/pdf2image/requirements.txt
|
||||
|
||||
COPY ./incl ./incl
|
||||
|
||||
RUN python3 -m pip install -e incl/pyinfra
|
||||
RUN python3 -m pip install -e incl/pdf2image
|
||||
|
||||
COPY ./src ./src
|
||||
COPY ./cv_analysis ./cv_analysis
|
||||
|
||||
@ -12,6 +12,9 @@ echo "dev setup for unit test and coverage"
|
||||
pip install -e incl/pyinfra
|
||||
pip install -r incl/pyinfra/requirements.txt
|
||||
|
||||
pip install -e incl/pdf2image
|
||||
pip install -r incl/pdf2image/requirements.txt
|
||||
|
||||
pip install -e .
|
||||
pip install -r requirements.txt
|
||||
|
||||
|
||||
Binary file not shown.
@ -1,19 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
if [[ \"${bamboo_version_tag}\" != \"dev\" ]]
|
||||
then
|
||||
${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
|
||||
-f ${bamboo_build_working_directory}/pom.xml \
|
||||
versions:set \
|
||||
-DnewVersion=${bamboo_version_tag}
|
||||
|
||||
${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
|
||||
-f ${bamboo_build_working_directory}/pom.xml \
|
||||
-B clean deploy \
|
||||
-e -DdeployAtEnd=true \
|
||||
-Dmaven.wagon.http.ssl.insecure=true \
|
||||
-Dmaven.wagon.http.ssl.allowall=true \
|
||||
-Dmaven.wagon.http.ssl.ignore.validity.dates=true \
|
||||
-DaltDeploymentRepository=iqser_release::default::https://nexus.iqser.com/repository/gin4-platform-releases
|
||||
fi
|
||||
@ -1,19 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
SERVICE_NAME=$1
|
||||
SERVICE_NAME_BASE=$2
|
||||
# TODO version tag on master push
|
||||
python3 -m venv build_venv
|
||||
source build_venv/bin/activate
|
||||
python3 -m pip install --upgrade pip
|
||||
|
||||
pip install dvc
|
||||
pip install 'dvc[ssh]'
|
||||
dvc pull
|
||||
|
||||
echo "index-url = https://${bamboo_nexus_user}:${bamboo_nexus_password}@nexus.iqser.com/repository/python-combind/simple" >> pip.conf
|
||||
docker build -f Dockerfile_base -t nexus.iqser.com:5001/red/$SERVICE_NAME_BASE:${bamboo_version_tag} .
|
||||
docker build -f Dockerfile -t nexus.iqser.com:5001/red/$SERVICE_NAME:${bamboo_version_tag} --build-arg VERSION_TAG=${bamboo_version_tag} .
|
||||
echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001
|
||||
docker push nexus.iqser.com:5001/red/$SERVICE_NAME:${bamboo_version_tag}
|
||||
@ -1,9 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
if [[ "${bamboo_version_tag}" = "dev" ]]
|
||||
then
|
||||
echo "gitTag=${bamboo_planRepository_1_branch}_${bamboo_buildNumber}" > git.tag
|
||||
else
|
||||
echo "gitTag=${bamboo_version_tag}" > git.tag
|
||||
fi
|
||||
@ -1,61 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
export JAVA_HOME=/usr/bin/sonar-scanner/jre
|
||||
|
||||
python3 -m venv build_venv
|
||||
source build_venv/bin/activate
|
||||
python3 -m pip install --upgrade pip
|
||||
|
||||
pip install -e .
|
||||
pip install -e incl/pyinfra
|
||||
|
||||
pip install -r incl/pyinfra/requirements.txt
|
||||
pip install -r requirements.txt
|
||||
|
||||
echo "DVC pull step"
|
||||
dvc pull
|
||||
|
||||
echo "coverage calculation"
|
||||
coverage run -m pytest test
|
||||
echo "coverage report generation"
|
||||
coverage report -m
|
||||
coverage xml
|
||||
|
||||
SERVICE_NAME=$1
|
||||
|
||||
echo "dependency-check:aggregate"
|
||||
mkdir -p reports
|
||||
dependency-check --enableExperimental -f JSON -f HTML -f XML \
|
||||
--disableAssembly -s . -o reports --project $SERVICE_NAME --exclude ".git/**" --exclude "venv/**" \
|
||||
--exclude "build_venv/**" --exclude "**/__pycache__/**" --exclude "bamboo-specs/**"
|
||||
|
||||
if [[ -z "${bamboo_repository_pr_key}" ]]
|
||||
then
|
||||
echo "Sonar Scan for branch: ${bamboo_planRepository_1_branch}"
|
||||
/usr/bin/sonar-scanner/bin/sonar-scanner -X\
|
||||
-Dsonar.projectKey=RED_$SERVICE_NAME \
|
||||
-Dsonar.sources=src,cv_analysis \
|
||||
-Dsonar.host.url=https://sonarqube.iqser.com \
|
||||
-Dsonar.login=${bamboo_sonarqube_api_token_secret} \
|
||||
-Dsonar.branch.name=${bamboo_planRepository_1_branch} \
|
||||
-Dsonar.dependencyCheck.jsonReportPath=reports/dependency-check-report.json \
|
||||
-Dsonar.dependencyCheck.xmlReportPath=reports/dependency-check-report.xml \
|
||||
-Dsonar.dependencyCheck.htmlReportPath=reports/dependency-check-report.html \
|
||||
-Dsonar.python.coverage.reportPaths=reports/coverage.xml
|
||||
|
||||
else
|
||||
echo "Sonar Scan for PR with key1: ${bamboo_repository_pr_key}"
|
||||
/usr/bin/sonar-scanner/bin/sonar-scanner \
|
||||
-Dsonar.projectKey=RED_$SERVICE_NAME \
|
||||
-Dsonar.sources=src,cv_analysis \
|
||||
-Dsonar.host.url=https://sonarqube.iqser.com \
|
||||
-Dsonar.login=${bamboo_sonarqube_api_token_secret} \
|
||||
-Dsonar.pullrequest.key=${bamboo_repository_pr_key} \
|
||||
-Dsonar.pullrequest.branch=${bamboo_repository_pr_sourceBranch} \
|
||||
-Dsonar.pullrequest.base=${bamboo_repository_pr_targetBranch} \
|
||||
-Dsonar.dependencyCheck.jsonReportPath=reports/dependency-check-report.json \
|
||||
-Dsonar.dependencyCheck.xmlReportPath=reports/dependency-check-report.xml \
|
||||
-Dsonar.dependencyCheck.htmlReportPath=reports/dependency-check-report.html \
|
||||
-Dsonar.python.coverage.reportPaths=reports/coverage.xml
|
||||
fi
|
||||
Binary file not shown.
@ -1,44 +1,42 @@
|
||||
from functools import partial
|
||||
from typing import Callable
|
||||
from itertools import starmap
|
||||
from operator import truth
|
||||
from typing import Callable, Iterator
|
||||
|
||||
from funcy import lmap
|
||||
|
||||
from cv_analysis.figure_detection.figure_detection_pipeline import make_figure_detection_pipeline
|
||||
from cv_analysis.layout_parsing import parse_layout
|
||||
from cv_analysis.server.rotate import rotate_rectangle
|
||||
from cv_analysis.table_parsing import parse_tables
|
||||
from cv_analysis.utils.pdf2image import pdf_to_image_metadata_pairs
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
from pdf2img.conversion import convert_pdf_to_image_and_metadata_stream
|
||||
|
||||
|
||||
def make_analysis_pipeline(analysis_fn: Callable, dpi=200):
|
||||
"""Make end-to-end pipeline to analyse a PDF with given analysis function.
|
||||
The pipeline returns a Generator of dicts containing page information and the analysis results.
|
||||
|
||||
The pipeline streams dicts containing page information and the analysis results.
|
||||
Note:
|
||||
If there are no results on a page, the page is skipped in result stream
|
||||
Steps:
|
||||
Convert PDF to pairs of image and page information
|
||||
Analyse pages, get list of bounding boxes per page (e.g. table cells)
|
||||
Convert PDF to a stream of page as image and metadata (page information) tuples
|
||||
Analyse pages:
|
||||
Get list of bounding boxes per page (e.g. table cells)
|
||||
Convert pixel values to inches
|
||||
Rotate results if page is rotated
|
||||
Format results to stream of dictionaries with page information and analysis results
|
||||
Format results
|
||||
"""
|
||||
|
||||
def pipeline(pdf: bytes, index=None):
|
||||
image_metadata_pairs = pdf_to_image_metadata_pairs(pdf, index=index, dpi=dpi)
|
||||
results = map(image_metadata_pair_to_results, image_metadata_pairs)
|
||||
results_filtered = filter(lambda x: x["bboxes"], results)
|
||||
return results_filtered
|
||||
def analysis_pipeline(pdf: bytes, index=None) -> Iterator[dict]:
|
||||
image_metadata_stream = convert_pdf_to_image_and_metadata_stream(pdf, index=index, dpi=dpi)
|
||||
results = starmap(analyse_image_metadata_pair, image_metadata_stream)
|
||||
yield from filter(truth, results)
|
||||
|
||||
def image_metadata_pair_to_results(image_metadata_pair):
|
||||
rectangles = analysis_fn(image_metadata_pair.image)
|
||||
rectangles = map(partial(pixel_rect_to_inches_rect, dpi=dpi), rectangles)
|
||||
if image_metadata_pair.metadata["rotation"] != 0:
|
||||
rotate_rectangle_fn = partial(rotate_rectangle, metadata=image_metadata_pair.metadata)
|
||||
rectangles = map(rotate_rectangle_fn, rectangles)
|
||||
bboxes = lmap(lambda x: x.json_xyxy(), rectangles)
|
||||
return {**image_metadata_pair.metadata, "bboxes": bboxes}
|
||||
def analyse_image_metadata_pair(image, metadata):
|
||||
rectangles = analysis_fn(image)
|
||||
rectangles = map(partial(convert_pixel_rect_to_inches_rect, dpi=dpi), rectangles)
|
||||
bboxes = lmap(lambda x: x.json_full(), rectangles)
|
||||
return {**metadata, "bboxes": bboxes} if bboxes else {}
|
||||
|
||||
return pipeline
|
||||
return analysis_pipeline
|
||||
|
||||
|
||||
def get_analysis_fn(analysis_type):
|
||||
@ -52,10 +50,9 @@ def get_analysis_fn(analysis_type):
|
||||
raise
|
||||
|
||||
|
||||
def pixel_rect_to_inches_rect(rect, dpi):
|
||||
def convert_pixel_to_inch(pixel):
|
||||
def convert_pixel_rect_to_inches_rect(rect, dpi):
|
||||
def pixel_to_inch(pixel):
|
||||
return pixel / dpi * 72
|
||||
|
||||
bbox = rect.x1, rect.y1, rect.x2, rect.y2
|
||||
bbox_inches = tuple(map(convert_pixel_to_inch, bbox))
|
||||
bbox_inches = tuple(map(pixel_to_inch, rect.xyxy()))
|
||||
return Rectangle.from_xyxy(bbox_inches, discrete=False)
|
||||
|
||||
@ -1,107 +0,0 @@
|
||||
from _operator import itemgetter
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
|
||||
|
||||
def rotate_rectangle(rectangle, metadata):
|
||||
width, height, rotation = itemgetter("width", "height", "rotation")(metadata)
|
||||
rotation = rotation // 90 if rotation not in [0, 1, 2, 3] else rotation
|
||||
|
||||
if rotation in [1, 3]:
|
||||
width, height = height, width
|
||||
|
||||
x1, y1, x2, y2 = rectangle.xyxy()
|
||||
matrix = np.vstack([[x1, y1], [x2, y2]]).T
|
||||
new_matrix = rotate_and_shift(matrix, rotation, (width, height))
|
||||
|
||||
x1, x2 = sorted(new_matrix[0, :])
|
||||
y1, y2 = sorted(new_matrix[1, :])
|
||||
|
||||
return Rectangle.from_xyxy((x1, y1, x2, y2), discrete=False)
|
||||
|
||||
|
||||
def rotate_and_shift(matrix, rotation, size, debug=False):
|
||||
"""Rotates a matrix against (!) a specified rotation. That is, the rotation is applied negatively. The matrix is
|
||||
also shifted to ensure it contains points (columns) in quadrant I.
|
||||
|
||||
Procedure:
|
||||
1) Rotate the matrix clockwise according to rotation value
|
||||
2) Shift the matrix back into quadrant I
|
||||
3) Set x_i and y_i to new lower left and upper right corners, since the corner vectors are no longer at these
|
||||
corners due to the rotation
|
||||
|
||||
Args:
|
||||
matrix: matrix to transform
|
||||
rotation: any of 0, 1, 2, or 3, where 1 = 90 degree CLOCKWISE rotation etc.
|
||||
size: the size of the page as a tuple (<width>, <height>)
|
||||
debug: Visualizes the transformations for later re-understanding of the code
|
||||
"""
|
||||
|
||||
def shift_to_quadrant_1(matrix):
|
||||
|
||||
# TODO: generalize
|
||||
if rotation == 0:
|
||||
back_shift = np.zeros_like(np.eye(2))
|
||||
elif rotation == 1:
|
||||
back_shift = np.array([[0, 0], [1, 1]]) * size[1]
|
||||
elif rotation == 2:
|
||||
back_shift = np.array([[1, 1], [1, 1]]) * size
|
||||
elif rotation == 3:
|
||||
back_shift = np.array([[1, 1], [0, 0]]) * size[0]
|
||||
else:
|
||||
raise ValueError(f"Unexpected rotation value '{rotation}'. Expected any of 0, 1, 2, or 3.")
|
||||
|
||||
matrix_shifted = matrix + back_shift
|
||||
return matrix_shifted
|
||||
|
||||
# PDF rotations are clockwise, hence subtract the radian value of the rotation from 2 pi
|
||||
radians = (2 * np.pi) - (np.pi * (rotation / 2))
|
||||
matrix_rotated = rotate(matrix, radians)
|
||||
matrix_rotated_and_shifted = shift_to_quadrant_1(matrix_rotated)
|
||||
|
||||
if debug:
|
||||
__show_matrices(size, radians, matrix, matrix_rotated, matrix_rotated_and_shifted)
|
||||
return matrix_rotated_and_shifted
|
||||
|
||||
|
||||
def __show_matrices(size, radians, matrix, matrix_rotated, matrix_rotated_and_shifted):
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
from copy import deepcopy
|
||||
|
||||
m1 = matrix
|
||||
m2 = matrix_rotated
|
||||
m3 = matrix_rotated_and_shifted
|
||||
|
||||
m1, m2, m3 = map(deepcopy, (m1, m2, m3))
|
||||
|
||||
frame = np.eye(2) * size
|
||||
frame_rotated = rotate(frame, radians)
|
||||
|
||||
f1 = frame
|
||||
f2 = frame_rotated
|
||||
|
||||
f1 *= 0.005 * 1
|
||||
f2 *= 0.005 * 1
|
||||
m1 *= 0.005 * 1
|
||||
m2 *= 0.005 * 1
|
||||
m3 *= 0.005 * 1
|
||||
|
||||
fig, axes = plt.subplots(1, 2, figsize=(8, 4))
|
||||
axes = axes.ravel()
|
||||
|
||||
axes[0].quiver([0, 0], [0, 0], f1[0, :], f1[1, :], scale=5, scale_units="inches", color="red")
|
||||
axes[1].quiver([0, 0], [0, 0], f2[0, :], f2[1, :], scale=5, scale_units="inches", color="red")
|
||||
axes[0].quiver([0, 0], [0, 0], m1[0, :], m1[1, :], scale=5, scale_units="inches")
|
||||
axes[1].quiver([0, 0], [0, 0], m2[0, :], m2[1, :], scale=5, scale_units="inches", color="green")
|
||||
axes[1].quiver([0, 0], [0, 0], m3[0, :], m3[1, :], scale=5, scale_units="inches", color="blue")
|
||||
|
||||
plt.show()
|
||||
|
||||
|
||||
def rotate(input_matrix, radians):
|
||||
rotation_matrix = np.vstack([[np.cos(radians), -np.sin(radians)], [np.sin(radians), np.cos(radians)]])
|
||||
|
||||
return np.dot(rotation_matrix, input_matrix)
|
||||
@ -1,46 +0,0 @@
|
||||
from dataclasses import dataclass
|
||||
from functools import partial
|
||||
from typing import Iterator
|
||||
|
||||
import fitz
|
||||
import numpy as np
|
||||
|
||||
|
||||
@dataclass
|
||||
class ImageMetadataPair:
|
||||
image: np.ndarray
|
||||
metadata: dict
|
||||
|
||||
|
||||
def pdf_to_image_metadata_pairs(pdf: bytes, index=None, dpi=200) -> Iterator[ImageMetadataPair]:
|
||||
"""Streams PDF as pairs of image (matrix) and metadata.
|
||||
Note: If Index is not given or evaluates to None, the whole PDF will be processed.
|
||||
"""
|
||||
convert_fn = partial(page_to_image_metadata_pair, dpi=dpi)
|
||||
yield from map(convert_fn, stream_pages(pdf, index))
|
||||
|
||||
|
||||
def page_to_image_metadata_pair(page: fitz.Page, dpi):
|
||||
metadata = get_page_info(page)
|
||||
pixmap = page.get_pixmap(dpi=dpi)
|
||||
array = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(pixmap.h, pixmap.w, pixmap.n)
|
||||
|
||||
return ImageMetadataPair(array, metadata)
|
||||
|
||||
|
||||
def stream_pages(pdf: bytes, index=None) -> Iterator[fitz.Page]:
|
||||
with fitz.open(stream=pdf) as pdf_handle:
|
||||
if not index:
|
||||
yield from pdf_handle
|
||||
else:
|
||||
for i in index:
|
||||
yield pdf_handle[i]
|
||||
|
||||
|
||||
def get_page_info(page):
|
||||
return {
|
||||
"index": page.number,
|
||||
"rotation": page.rotation,
|
||||
"width": page.rect.width, # rotated page width in inches
|
||||
"height": page.rect.height, # rotated page height in inches
|
||||
}
|
||||
1
incl/pdf2image
Submodule
1
incl/pdf2image
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit d1a68b9e580ecbc0cd3050deeedc2d648b377232
|
||||
@ -1 +1 @@
|
||||
Subproject commit 6c2652837a17a29476b11b1acbc35ba8825c2cd9
|
||||
Subproject commit 0f6512df5423df98d334f5735170cd1f7642998a
|
||||
@ -1,24 +0,0 @@
|
||||
import fitz
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from cv_analysis.utils.pdf2image import pdf_to_image_metadata_pairs
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pdf(n_pages):
|
||||
doc = fitz.open()
|
||||
for n in range(n_pages):
|
||||
page = doc.new_page()
|
||||
where = fitz.Point(50, 100)
|
||||
page.insert_text(where, "De gustibus non est disputandum.", fontsize=30)
|
||||
return doc.write()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_pages", [1])
|
||||
def test_pdf_to_array_and_metadata(pdf):
|
||||
for image_metadata_pair in pdf_to_image_metadata_pairs(pdf):
|
||||
assert isinstance(image_metadata_pair.image, np.ndarray)
|
||||
assert image_metadata_pair.image.shape == (2339, 1653, 3) # Height, Width, Color channels
|
||||
|
||||
assert isinstance(image_metadata_pair.metadata, dict)
|
||||
40
test/unit_tests/server_pipeline_test.py
Normal file
40
test/unit_tests/server_pipeline_test.py
Normal file
@ -0,0 +1,40 @@
|
||||
import fitz
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from cv_analysis.server.pipeline import make_analysis_pipeline
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
|
||||
|
||||
def analysis_fn_mock(image: np.ndarray):
|
||||
bbox = (0, 0, 42, 42)
|
||||
return [Rectangle.from_xyxy(bbox)]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty_pdf(n_pages):
|
||||
doc = fitz.open()
|
||||
for n in range(n_pages):
|
||||
doc.new_page()
|
||||
return doc.write()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def expected_formatted_analysis_result(n_pages):
|
||||
return [
|
||||
{
|
||||
"pageNumber": page_number,
|
||||
"rotation": 0,
|
||||
"width": 595.0,
|
||||
"height": 842.0,
|
||||
"bboxes": [{"x1": 0.0, "y1": 0.0, "x2": 15.12, "y2": 15.12, "width": 15.12, "height": 15.12}],
|
||||
}
|
||||
for page_number in range(n_pages)
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_pages", [1, 2])
|
||||
def test_analysis_pipeline(empty_pdf, expected_formatted_analysis_result):
|
||||
analysis_pipeline = make_analysis_pipeline(analysis_fn_mock)
|
||||
results = analysis_pipeline(empty_pdf)
|
||||
assert list(results) == expected_formatted_analysis_result
|
||||
Loading…
x
Reference in New Issue
Block a user