From 8c88fc594d7127cc78a68fba8108c812fb9ab8f2 Mon Sep 17 00:00:00 2001 From: Matthias Bisping Date: Sat, 5 Feb 2022 14:42:00 +0100 Subject: [PATCH] renaming; readme --- README.md | 26 ++++++++++++++++++++++++++ scripts/annotate.py | 8 ++++---- 2 files changed, 30 insertions(+), 4 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..38c9bca --- /dev/null +++ b/README.md @@ -0,0 +1,26 @@ +# Table Parsing + +This repository implements computer vision based approaches for detecting and parsing visual features such as tables or previous redactions. + +## Installation + +```bash +git clone ssh://git@git.iqser.com:2222/rr/table_parsing.git +cd table_parsing + +python3 -m venv env +source env/bin/activate + +pip install -e . +pip install -r requirements.txt +``` + +## Usage + +```bash +# Parse tables on second page of a PDF +python scripts/annotate.py 1 --type table + +# Detect redactions (black filled rectangles) on first page of a PDF +python scripts/annotate.py 0 --type redaction +``` diff --git a/scripts/annotate.py b/scripts/annotate.py index 68e4dd2..1f14019 100644 --- a/scripts/annotate.py +++ b/scripts/annotate.py @@ -9,7 +9,7 @@ def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("pdf_path") parser.add_argument("page_index", type=int) - parser.add_argument("--object", choices=["table", "box", "layout"], default="table") + parser.add_argument("--type", choices=["table", "redaction", "layout"], default="table") args = parser.parse_args() @@ -18,10 +18,10 @@ def parse_args(): if __name__ == "__main__": args = parse_args() - if args.object == "table": + if args.type == "table": annotate_tables_in_pdf(args.pdf_path, page_index=args.page_index) - elif args.object == "box": + elif args.type == "redaction": annotate_boxes_in_pdf(args.pdf_path, page_index=args.page_index) - elif args.object == "layout": + elif args.type == "layout": annotate_layout_in_pdf(args.pdf_path, page_index=args.page_index)