Matthias Bisping 77f85e9de1 Refactoring
Various
2023-01-09 17:22:01 +01:00

76 lines
2.2 KiB
Python

import argparse
import loguru
from cv_analysis.figure_detection.figure_detection import detect_figures
from cv_analysis.layout_parsing import parse_layout
from cv_analysis.redaction_detection import find_redactions
from cv_analysis.table_parsing import parse_tables
from cv_analysis.utils.display import show_image
from cv_analysis.utils.drawing import draw_contours, draw_rectangles
from cv_analysis.utils.input import open_analysis_input_file
def parse_args():
parser = argparse.ArgumentParser(
description="Annotate PDF pages with detected elements. Specified pages form a closed interval and are 1-based."
)
parser.add_argument("pdf_path")
parser.add_argument(
"--first_page",
"-f",
type=int,
default=1,
)
parser.add_argument(
"-last_page",
"-l",
help="if not specified, defaults to the value of the first page specified",
type=int,
default=None,
)
parser.add_argument(
"--type",
"-t",
help="element type to look for and analyze",
choices=["table", "redaction", "layout", "figure"],
default="table",
)
parser.add_argument("--page", "-p", type=int, default=1)
args = parser.parse_args()
return args
def annotate_page(page_image, analysis_fn, draw_fn):
result = analysis_fn(page_image)
page_image = draw_fn(page_image, result)
show_image(page_image)
def get_analysis_and_draw_fn_for_type(element_type):
analysis_fn, draw_fn = {
"table": (parse_tables, draw_rectangles),
"redaction": (find_redactions, draw_contours),
"layout": (parse_layout, draw_rectangles),
"figure": (detect_figures, draw_rectangles),
}[element_type]
return analysis_fn, draw_fn
def main(args):
loguru.logger.info(f"Annotating {args.type}s in {args.pdf_path}...")
pages = open_analysis_input_file(args.pdf_path, first_page=args.first_page, last_page=args.last_page)
for page in pages:
analysis_fn, draw_fn = get_analysis_and_draw_fn_for_type(args.type)
annotate_page(page, analysis_fn, draw_fn)
if __name__ == "__main__":
try:
main(parse_args())
except KeyboardInterrupt:
pass