From 2180ff924a5335e6bc4eb40daacd04e050664e60 Mon Sep 17 00:00:00 2001 From: Isaac Riley Date: Wed, 23 Feb 2022 13:41:57 +0100 Subject: [PATCH] make full demo --- scripts/deskew_demo.py | 32 +++++++++++++++++++++++++++----- vidocp/table_parsing.py | 2 +- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/scripts/deskew_demo.py b/scripts/deskew_demo.py index cfb0eb4..dc17e53 100644 --- a/scripts/deskew_demo.py +++ b/scripts/deskew_demo.py @@ -2,15 +2,19 @@ import argparse import numpy as np import pdf2image +from PIL import Image +from vidocp.utils.deskew import deskew_histbased#, deskew_linebased from vidocp.utils.display import show_mpl -from vidocp.utils.deskew import deskew_histbased, deskew_linebased +from vidocp.utils.draw import draw_stats +from vidocp.table_parsing import parse_table def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("pdf_path") parser.add_argument("page_index", type=int) + parser.add_argument("--save_path") args = parser.parse_args() @@ -23,7 +27,25 @@ if __name__ == "__main__": page = np.array(page) show_mpl(page) - page_ = deskew_linebased(page, verbose=True) - show_mpl(page_) - page_ = deskew_histbased(page, verbose=True) - show_mpl(page_) + #page_ = deskew_linebased(page, verbose=True) + #show_mpl(page_) + page_corr = deskew_histbased(page, verbose=True) + show_mpl(page_corr) + if args.save_path: + page_ = Image.fromarray(page).convert("RGB") + page_.save(args.save_path.replace(".pdf", "_uncorrected.pdf")) + page_corr_ = Image.fromarray(page_corr).convert("RGB") + page_corr_.save(args.save_path.replace(".pdf", "_corrected.pdf")) + #annotate_tables_in_pdf(args.pdf_path, page_index=args.page_index) + stats = parse_table(page) + page = draw_stats(page, stats) + show_mpl(page) + stats_corr = parse_table(page_corr) + page_corr = draw_stats(page_corr, stats_corr) + show_mpl(page_corr) + if args.save_path: + page = Image.fromarray(page).convert("RGB") + page.save(args.save_path.replace(".pdf", "_uncorrected_annotated.pdf")) + page_corr = Image.fromarray(page_corr).convert("RGB") + page_corr.save(args.save_path.replace(".pdf", "_corrected_annotated.pdf")) + \ No newline at end of file diff --git a/vidocp/table_parsing.py b/vidocp/table_parsing.py index c188cf6..be77a9c 100644 --- a/vidocp/table_parsing.py +++ b/vidocp/table_parsing.py @@ -46,7 +46,7 @@ def parse_table(image: np.array): return stats -def annotate_tables_in_pdf(pdf_path, page_index=1, deskew=True): +def annotate_tables_in_pdf(pdf_path, page_index=0, deskew=False): page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0] page = np.array(page)