make full demo

This commit is contained in:
Isaac Riley 2022-02-23 13:41:57 +01:00
parent a09fe65ed2
commit 2180ff924a
2 changed files with 28 additions and 6 deletions

View File

@ -2,15 +2,19 @@
import argparse
import numpy as np
import pdf2image
from PIL import Image
from vidocp.utils.deskew import deskew_histbased#, deskew_linebased
from vidocp.utils.display import show_mpl
from vidocp.utils.deskew import deskew_histbased, deskew_linebased
from vidocp.utils.draw import draw_stats
from vidocp.table_parsing import parse_table
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("pdf_path")
parser.add_argument("page_index", type=int)
parser.add_argument("--save_path")
args = parser.parse_args()
@ -23,7 +27,25 @@ if __name__ == "__main__":
page = np.array(page)
show_mpl(page)
page_ = deskew_linebased(page, verbose=True)
show_mpl(page_)
page_ = deskew_histbased(page, verbose=True)
show_mpl(page_)
#page_ = deskew_linebased(page, verbose=True)
#show_mpl(page_)
page_corr = deskew_histbased(page, verbose=True)
show_mpl(page_corr)
if args.save_path:
page_ = Image.fromarray(page).convert("RGB")
page_.save(args.save_path.replace(".pdf", "_uncorrected.pdf"))
page_corr_ = Image.fromarray(page_corr).convert("RGB")
page_corr_.save(args.save_path.replace(".pdf", "_corrected.pdf"))
#annotate_tables_in_pdf(args.pdf_path, page_index=args.page_index)
stats = parse_table(page)
page = draw_stats(page, stats)
show_mpl(page)
stats_corr = parse_table(page_corr)
page_corr = draw_stats(page_corr, stats_corr)
show_mpl(page_corr)
if args.save_path:
page = Image.fromarray(page).convert("RGB")
page.save(args.save_path.replace(".pdf", "_uncorrected_annotated.pdf"))
page_corr = Image.fromarray(page_corr).convert("RGB")
page_corr.save(args.save_path.replace(".pdf", "_corrected_annotated.pdf"))

View File

@ -46,7 +46,7 @@ def parse_table(image: np.array):
return stats
def annotate_tables_in_pdf(pdf_path, page_index=1, deskew=True):
def annotate_tables_in_pdf(pdf_path, page_index=0, deskew=False):
page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0]
page = np.array(page)