76 lines
2.2 KiB
Python
76 lines
2.2 KiB
Python
import argparse
|
|
|
|
import loguru
|
|
|
|
from cv_analysis.figure_detection.figure_detection import detect_figures
|
|
from cv_analysis.layout_parsing import parse_layout
|
|
from cv_analysis.redaction_detection import find_redactions
|
|
from cv_analysis.table_parsing import parse_tables
|
|
from cv_analysis.utils.display import show_image
|
|
from cv_analysis.utils.drawing import draw_contours, draw_rectangles
|
|
from cv_analysis.utils.input import open_analysis_input_file
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser(
|
|
description="Annotate PDF pages with detected elements. Specified pages form a closed interval and are 1-based."
|
|
)
|
|
parser.add_argument("pdf_path")
|
|
parser.add_argument(
|
|
"--first_page",
|
|
"-f",
|
|
type=int,
|
|
default=1,
|
|
)
|
|
parser.add_argument(
|
|
"-last_page",
|
|
"-l",
|
|
help="if not specified, defaults to the value of the first page specified",
|
|
type=int,
|
|
default=None,
|
|
)
|
|
parser.add_argument(
|
|
"--type",
|
|
"-t",
|
|
help="element type to look for and analyze",
|
|
choices=["table", "redaction", "layout", "figure"],
|
|
default="table",
|
|
)
|
|
parser.add_argument("--page", "-p", type=int, default=1)
|
|
args = parser.parse_args()
|
|
return args
|
|
|
|
|
|
def annotate_page(page_image, analysis_fn, draw_fn):
|
|
result = analysis_fn(page_image)
|
|
page_image = draw_fn(page_image, result)
|
|
show_image(page_image)
|
|
|
|
|
|
def get_analysis_and_draw_fn_for_type(element_type):
|
|
analysis_fn, draw_fn = {
|
|
"table": (parse_tables, draw_rectangles),
|
|
"redaction": (find_redactions, draw_contours),
|
|
"layout": (parse_layout, draw_rectangles),
|
|
"figure": (detect_figures, draw_rectangles),
|
|
}[element_type]
|
|
|
|
return analysis_fn, draw_fn
|
|
|
|
|
|
def main(args):
|
|
loguru.logger.info(f"Annotating {args.type}s in {args.pdf_path}...")
|
|
|
|
pages = open_analysis_input_file(args.pdf_path, first_page=args.first_page, last_page=args.last_page)
|
|
|
|
for page in pages:
|
|
analysis_fn, draw_fn = get_analysis_and_draw_fn_for_type(args.type)
|
|
annotate_page(page, analysis_fn, draw_fn)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
main(parse_args())
|
|
except KeyboardInterrupt:
|
|
pass
|