Merge branch 'master' of ssh://git.iqser.com:2222/rr/cv-analysis into clean_cv
This commit is contained in:
commit
e3f06da823
@ -38,14 +38,14 @@ def make_analysis_pipeline(analysis_fn, formatter, dpi):
|
||||
return analyse_pipeline
|
||||
|
||||
|
||||
def table_parsing_formatter(rects, page, dpi):
|
||||
def table_parsing_formatter(rects, page: ImagePlus, dpi):
|
||||
def format_rect(rect: Rectangle):
|
||||
rect_plus = RectanglePlus.from_pixels(*rect.xyxy(), page.info, alpha=False, dpi=dpi)
|
||||
return rect_plus.asdict(derotate=True)
|
||||
|
||||
bboxes = lmap(format_rect, rects)
|
||||
|
||||
return {"pageInfo": page.asdict(), "tableCells": bboxes}
|
||||
return {"pageInfo": page.asdict(natural_index=True), "tableCells": bboxes}
|
||||
|
||||
|
||||
def figure_detection_formatter(rects, page, dpi):
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
from funcy import lmap
|
||||
from funcy import lmap, lfilter
|
||||
|
||||
from cv_analysis.layout_parsing import parse_layout
|
||||
from cv_analysis.utils.postprocessing import remove_isolated # xywh_to_vecs, xywh_to_vec_rect, adjacent1d
|
||||
@ -106,8 +106,11 @@ def turn_connected_components_into_rects(image: np.array):
|
||||
|
||||
_, _, stats, _ = cv2.connectedComponentsWithStats(~image, connectivity=8, ltype=cv2.CV_32S)
|
||||
|
||||
stats = np.vstack(list(filter(is_large_enough, stats)))
|
||||
return stats[:, :-1][2:]
|
||||
stats = lfilter(is_large_enough, stats)
|
||||
if stats:
|
||||
stats = np.vstack(stats)
|
||||
return stats[:, :-1][2:]
|
||||
return []
|
||||
|
||||
|
||||
def parse_tables(image: np.array, show=False):
|
||||
|
||||
@ -1 +1 @@
|
||||
Subproject commit 9bb5a86310f065b852e16679cf37d5c939c0cacd
|
||||
Subproject commit f7292c30ad7c7ae5f07cee6925adda096301b60a
|
||||
@ -1 +1 @@
|
||||
Subproject commit 88b4c5c7ce9852b8aa4bdd6b760f4c8b708df62b
|
||||
Subproject commit 94beb544fac425257ab2ba9e9ad4ad53abc32c71
|
||||
@ -8,21 +8,22 @@ from cv_analysis.server.pipeline import get_analysis_pipeline
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("pdf")
|
||||
parser.add_argument("--type", "-t", choices=["table", "layout", "figure"], required=True)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
|
||||
detect_figures = get_analysis_pipeline("figure")
|
||||
analysis_fn = get_analysis_pipeline(args.type)
|
||||
|
||||
with open(args.pdf, "rb") as f:
|
||||
pdf_bytes = f.read()
|
||||
|
||||
results = list(detect_figures(pdf_bytes))
|
||||
results = list(analysis_fn(pdf_bytes))
|
||||
|
||||
folder = Path(args.pdf).parent
|
||||
file_stem = Path(args.pdf).stem
|
||||
|
||||
with open(f"{folder}/{file_stem}_figures.json", "w+") as f:
|
||||
json.dump(results, f, indent=2)
|
||||
with open(f"{folder}/{file_stem}_{args.type}.json", "w+") as f:
|
||||
json.dump(results, f, indent=2)
|
||||
@ -24,8 +24,17 @@ def expected_formatted_analysis_result(operation):
|
||||
if operation == "table":
|
||||
return [
|
||||
{
|
||||
"pageInfo": {"number": 0, "rotation": 0, "width": 595.0, "height": 842.0},
|
||||
"tableCells": [{"x0": 0.0, "y0": 0.0, "x1": 15.12, "y1": 15.12, "width": 15.12, "height": 15.12}],
|
||||
"pageInfo": {"number": 1, "rotation": 0, "width": 595.0, "height": 842.0},
|
||||
"tableCells": [
|
||||
{
|
||||
"x0": 0.0,
|
||||
"y0": 826.8800048828125,
|
||||
"x1": 15.119999885559082,
|
||||
"y1": 842.0,
|
||||
"width": 15.119999885559082,
|
||||
"height": 15.1199951171875,
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
if operation == "figure":
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user