cv-analysis-service/cv_analysis/server/pipeline.py

from functools import partial
from typing import Callable

from funcy import lmap

from cv_analysis.figure_detection.figure_detection_pipeline import make_figure_detection_pipeline
from cv_analysis.layout_parsing import parse_layout
from cv_analysis.server.rotate import rotate_rectangle
from cv_analysis.table_parsing import parse_tables
from cv_analysis.utils.pdf2image import pdf_to_image_metadata_pairs
from cv_analysis.utils.structures import Rectangle


def make_analysis_pipeline(analysis_fn: Callable, dpi=200):
    """Make end-to-end pipeline to analyse a PDF with given analysis function.
    The pipeline returns a Generator of dicts containing page information and the analysis results.

    Steps:
        Convert PDF to pairs of image and page information
        Analyse pages, get list of bounding boxes per page (e.g. table cells)
        Convert pixel values to inches
        Rotate results if page is rotated
        Format results to stream of dictionaries with page information and analysis results
    """

    def pipeline(pdf: bytes, index=None):
        image_metadata_pairs = pdf_to_image_metadata_pairs(pdf, index=index, dpi=dpi)
        results = map(image_metadata_pair_to_results, image_metadata_pairs)
        results_filtered = filter(lambda x: x["bboxes"], results)
        return results_filtered

    def image_metadata_pair_to_results(image_metadata_pair):
        rectangles = analysis_fn(image_metadata_pair.image)
        rectangles = map(partial(pixel_rect_to_inches_rect, dpi=dpi), rectangles)
        if image_metadata_pair.metadata["rotation"] != 0:
            rotate_rectangle_fn = partial(rotate_rectangle, metadata=image_metadata_pair.metadata)
            rectangles = map(rotate_rectangle_fn, rectangles)
        bboxes = lmap(lambda x: x.json_xyxy(), rectangles)
        return {**image_metadata_pair.metadata, "bboxes": bboxes}

    return pipeline


def get_analysis_fn(analysis_type):
    if analysis_type == "table":
        return parse_tables
    elif analysis_type == "layout":
        return parse_layout
    elif analysis_type == "figure":
        return make_figure_detection_pipeline()
    else:
        raise


def pixel_rect_to_inches_rect(rect, dpi):
    def convert_pixel_to_inch(pixel):
        return pixel / dpi * 72

    bbox = rect.x1, rect.y1, rect.x2, rect.y2
    bbox_inches = tuple(map(convert_pixel_to_inch, bbox))
    return Rectangle.from_xyxy(bbox_inches, discrete=False)