cv-analysis-service/cv_analysis/fig_detection_with_layout.py

from cv_analysis.layout_parsing import annotate_layout_in_pdf
from cv_analysis.figure_detection import detect_figures
from cv_analysis.table_parsing import tables_in_image, parse_table
from cv_analysis.utils.draw import draw_rectangles
from cv_analysis.utils.display import show_mpl
from cv_analysis.utils.visual_logging import vizlogger
#from PIL import Image


def cut_out_content_structures(layout_rects, page):
    large_rects = []
    small_rects = []
    for x, y, w, h in layout_rects:
        rect = (x, y, w, h)
        if w * h >= 75000:
            cropped_page = page[y:(y + h), x:(x + w)]
            large_rects.append([rect, cropped_page])
        else:
            cropped_page = page[y:(y + h), x:(x + w)]
            small_rects.append([rect, cropped_page])
    return large_rects, small_rects


def parse_content_structures(page, large_rects, small_rects):
    for coordinates, cropped_image in large_rects:
        figure_rects = detect_figures(cropped_image)
        if len(figure_rects) == 0:  # text
            page = draw_rectangles(page, [coordinates], color=(0, 255, 0), annotate=True)
        elif len(parse_table(cropped_image)) > 0:
        #elif tables_in_image(cropped_image)[0]:  # table
            stats = parse_table(cropped_image)
            cropped_image = draw_rectangles(cropped_image, stats, color=(255, 0, 0), annotate=True)
            x,y,w,h = coordinates
            page[y:y+h, x:x+w] = cropped_image
        else:  # figure
            page = draw_rectangles(page, [coordinates], color=(0, 0, 255), annotate=True)

    # for coordinates, cropped_image in small_rects:
    #     figure_rects = detect_figures(cropped_image)
    #     if len(figure_rects) == 0 and len(list(find_primary_text_regions(cropped_image))) > 0:
    #         page = draw_rectangles(page, [coordinates], color=(0, 255, 0), annotate=True)
    #     else:
    #         page = draw_rectangles(page, [coordinates], color=(0, 255, 255), annotate=True)
    return page


def detect_figures_with_layout_parsing(pdf_path, page_index=1, show=False):
    layout_rects, page = annotate_layout_in_pdf(pdf_path, page_index, return_rects=True)
    big_structures, small_structures = cut_out_content_structures(layout_rects, page)
    page = parse_content_structures(page, big_structures, small_structures)
    vizlogger.debug(page, "figures03_final.png")
    if show:
        show_mpl(page)
    else:
        return page