33 lines
1002 B
Python
33 lines
1002 B
Python
from cv_analysis.table_parsing import parse_tables
|
|
from cv_analysis.redaction_detection import find_redactions
|
|
from cv_analysis.layout_parsing import parse_layout
|
|
from cv_analysis.figure_detection import detect_figures
|
|
from cv_analysis.utils.preprocessing import open_img_from_bytes
|
|
|
|
|
|
task_dict = {
|
|
"table": parse_tables,
|
|
"figure": detect_figures,
|
|
"layout": parse_layout,
|
|
"redaction": find_redactions,
|
|
}
|
|
|
|
|
|
def analyze_bytes(img_bytes, page_num, task="table"):
|
|
analysis_function = task_dict[task]
|
|
page = open_img_from_bytes(img_bytes)
|
|
cells = list(map(lambda x: x.json_xywh(), analysis_function(page)))
|
|
page_dict = {
|
|
"page": page_num,
|
|
"pageWidth": page.shape[1],
|
|
"pageHeight": page.shape[0],
|
|
"cells": cells
|
|
}
|
|
return page_dict
|
|
|
|
|
|
def analyze_bytes_list(img_bytes_list, task="table"):
|
|
result = []
|
|
for i, img_bytes in enumerate(img_bytes_list):
|
|
result.append(analyze_bytes(img_bytes, i, task=task))
|
|
return result |