62 lines
2.4 KiB
Python
62 lines
2.4 KiB
Python
from functools import partial
|
|
from typing import Callable
|
|
|
|
from funcy import lmap
|
|
|
|
from cv_analysis.figure_detection.figure_detection_pipeline import make_figure_detection_pipeline
|
|
from cv_analysis.layout_parsing import parse_layout
|
|
from cv_analysis.server.rotate import rotate_rectangle
|
|
from cv_analysis.table_parsing import parse_tables
|
|
from cv_analysis.utils.pdf2image import pdf_to_image_metadata_pairs
|
|
from cv_analysis.utils.structures import Rectangle
|
|
|
|
|
|
def make_analysis_pipeline(analysis_fn: Callable, dpi=200):
|
|
"""Make end-to-end pipeline to analyse a PDF with given analysis function.
|
|
The pipeline returns a Generator of dicts containing page information and the analysis results.
|
|
|
|
Steps:
|
|
Convert PDF to pairs of image and page information
|
|
Analyse pages, get list of bounding boxes per page (e.g. table cells)
|
|
Convert pixel values to inches
|
|
Rotate results if page is rotated
|
|
Format results to stream of dictionaries with page information and analysis results
|
|
"""
|
|
|
|
def pipeline(pdf: bytes, index=None):
|
|
image_metadata_pairs = pdf_to_image_metadata_pairs(pdf, index=index, dpi=dpi)
|
|
results = map(image_metadata_pair_to_results, image_metadata_pairs)
|
|
results_filtered = filter(lambda x: x["bboxes"], results)
|
|
return results_filtered
|
|
|
|
def image_metadata_pair_to_results(image_metadata_pair):
|
|
rectangles = analysis_fn(image_metadata_pair.image)
|
|
rectangles = map(partial(pixel_rect_to_inches_rect, dpi=dpi), rectangles)
|
|
if image_metadata_pair.metadata["rotation"] != 0:
|
|
rotate_rectangle_fn = partial(rotate_rectangle, metadata=image_metadata_pair.metadata)
|
|
rectangles = map(rotate_rectangle_fn, rectangles)
|
|
bboxes = lmap(lambda x: x.json_xyxy(), rectangles)
|
|
return {**image_metadata_pair.metadata, "bboxes": bboxes}
|
|
|
|
return pipeline
|
|
|
|
|
|
def get_analysis_fn(analysis_type):
|
|
if analysis_type == "table":
|
|
return parse_tables
|
|
elif analysis_type == "layout":
|
|
return parse_layout
|
|
elif analysis_type == "figure":
|
|
return make_figure_detection_pipeline()
|
|
else:
|
|
raise
|
|
|
|
|
|
def pixel_rect_to_inches_rect(rect, dpi):
|
|
def convert_pixel_to_inch(pixel):
|
|
return pixel / dpi * 72
|
|
|
|
bbox = rect.x1, rect.y1, rect.x2, rect.y2
|
|
bbox_inches = tuple(map(convert_pixel_to_inch, bbox))
|
|
return Rectangle.from_xyxy(bbox_inches, discrete=False)
|