51 lines
1.8 KiB
Python
51 lines
1.8 KiB
Python
# sample usage: python3 scripts/deskew_demo.py /path/to/crooked.pdf 0
|
|
import argparse
|
|
import numpy as np
|
|
import pdf2image
|
|
from PIL import Image
|
|
|
|
from cv_analysis.utils.deskew import deskew_histbased # , deskew_linebased
|
|
from cv_analysis.utils.display import show_mpl
|
|
from cv_analysis.utils.draw import draw_stats
|
|
from cv_analysis.table_parsing import parse_table
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("pdf_path")
|
|
parser.add_argument("page_index", type=int)
|
|
parser.add_argument("--save_path")
|
|
|
|
args = parser.parse_args()
|
|
|
|
return args
|
|
|
|
|
|
if __name__ == "__main__":
|
|
args = parse_args()
|
|
page = pdf2image.convert_from_path(args.pdf_path, first_page=args.page_index + 1, last_page=args.page_index + 1)[0]
|
|
page = np.array(page)
|
|
|
|
show_mpl(page)
|
|
# page_ = deskew_linebased(page, verbose=True)
|
|
# show_mpl(page_)
|
|
page_corr, _ = deskew_histbased(page, verbose=True)
|
|
show_mpl(page_corr)
|
|
if args.save_path:
|
|
page_ = Image.fromarray(page).convert("RGB")
|
|
page_.save(args.save_path.replace(".pdf", "_uncorrected.pdf"))
|
|
page_corr_ = Image.fromarray(page_corr).convert("RGB")
|
|
page_corr_.save(args.save_path.replace(".pdf", "_corrected.pdf"))
|
|
# annotate_tables_in_pdf(args.pdf_path, page_index=args.page_index)
|
|
stats = parse_table(page)
|
|
page = draw_stats(page, stats)
|
|
show_mpl(page)
|
|
stats_corr = parse_table(page_corr)
|
|
page_corr = draw_stats(page_corr, stats_corr)
|
|
show_mpl(page_corr)
|
|
if args.save_path:
|
|
page = Image.fromarray(page).convert("RGB")
|
|
page.save(args.save_path.replace(".pdf", "_uncorrected_annotated.pdf"))
|
|
page_corr = Image.fromarray(page_corr).convert("RGB")
|
|
page_corr.save(args.save_path.replace(".pdf", "_corrected_annotated.pdf"))
|