make full demo
This commit is contained in:
parent
a09fe65ed2
commit
2180ff924a
@ -2,15 +2,19 @@
|
||||
import argparse
|
||||
import numpy as np
|
||||
import pdf2image
|
||||
from PIL import Image
|
||||
|
||||
from vidocp.utils.deskew import deskew_histbased#, deskew_linebased
|
||||
from vidocp.utils.display import show_mpl
|
||||
from vidocp.utils.deskew import deskew_histbased, deskew_linebased
|
||||
from vidocp.utils.draw import draw_stats
|
||||
from vidocp.table_parsing import parse_table
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("pdf_path")
|
||||
parser.add_argument("page_index", type=int)
|
||||
parser.add_argument("--save_path")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@ -23,7 +27,25 @@ if __name__ == "__main__":
|
||||
page = np.array(page)
|
||||
|
||||
show_mpl(page)
|
||||
page_ = deskew_linebased(page, verbose=True)
|
||||
show_mpl(page_)
|
||||
page_ = deskew_histbased(page, verbose=True)
|
||||
show_mpl(page_)
|
||||
#page_ = deskew_linebased(page, verbose=True)
|
||||
#show_mpl(page_)
|
||||
page_corr = deskew_histbased(page, verbose=True)
|
||||
show_mpl(page_corr)
|
||||
if args.save_path:
|
||||
page_ = Image.fromarray(page).convert("RGB")
|
||||
page_.save(args.save_path.replace(".pdf", "_uncorrected.pdf"))
|
||||
page_corr_ = Image.fromarray(page_corr).convert("RGB")
|
||||
page_corr_.save(args.save_path.replace(".pdf", "_corrected.pdf"))
|
||||
#annotate_tables_in_pdf(args.pdf_path, page_index=args.page_index)
|
||||
stats = parse_table(page)
|
||||
page = draw_stats(page, stats)
|
||||
show_mpl(page)
|
||||
stats_corr = parse_table(page_corr)
|
||||
page_corr = draw_stats(page_corr, stats_corr)
|
||||
show_mpl(page_corr)
|
||||
if args.save_path:
|
||||
page = Image.fromarray(page).convert("RGB")
|
||||
page.save(args.save_path.replace(".pdf", "_uncorrected_annotated.pdf"))
|
||||
page_corr = Image.fromarray(page_corr).convert("RGB")
|
||||
page_corr.save(args.save_path.replace(".pdf", "_corrected_annotated.pdf"))
|
||||
|
||||
@ -46,7 +46,7 @@ def parse_table(image: np.array):
|
||||
return stats
|
||||
|
||||
|
||||
def annotate_tables_in_pdf(pdf_path, page_index=1, deskew=True):
|
||||
def annotate_tables_in_pdf(pdf_path, page_index=0, deskew=False):
|
||||
|
||||
page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0]
|
||||
page = np.array(page)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user