import argparse import json from pathlib import Path from detr.test import draw_boxes from pdf2image import pdf2image from fb_detr.predictor import Predictor def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--resume", required=True) parser.add_argument("--output_dir", required=True) parser.add_argument("--pdf_path") parser.add_argument("--draw_boxes", default=False, action="store_true") args = parser.parse_args() return args def build_image_paths(image_root_dir): return [*map(str, Path(image_root_dir).glob("*.png"))] def pdf_to_pages(pdf_path): pages = pdf2image.convert_from_path(pdf_path) return pages def main(): # TDOO: de-hardcode classes = {1: "logo", 2: "other", 3: "formula", 4: "signature", 5: "handwriting_other"} args = parse_args() predictor = Predictor(args.resume, classes=classes, rejection_class="other") images = pdf_to_pages(args.pdf_path) outputs = predictor.predict(images, 0.5) if args.draw_boxes: for im, o in zip(images, outputs): if len(o["bboxes"]): draw_boxes(image=im, **o, output_path=args.output_dir) else: outputs = predictor.format_predictions(outputs) outputs = predictor.filter_predictions(outputs) for o in outputs: print(json.dumps(o, indent=2)) if __name__ == "__main__": main()