diff --git a/table_parsing/table_parsig.py b/table_parsing/table_parsig.py index c54aacd..ce69434 100644 --- a/table_parsing/table_parsig.py +++ b/table_parsing/table_parsig.py @@ -1,3 +1,4 @@ +import json from itertools import count import cv2 @@ -32,7 +33,14 @@ def parse_tables_in_pdf(pages): def annotate_image(image, stats): for x, y, w, h, area in stats[2:]: - cv2.rectangle(image, (x, y), (x + w, y + h), (255, 0, 255), 2) + if w > 10 and h > 10: + cv2.rectangle(image, (x, y), (x + w, y + h), (255, 0, 255), 2) + + for i, (s, v) in enumerate(zip(["x", "y", "w", "h"], [x, y, w, h])): + anno = f"{s} = {v}" + xann = int(x + 5) + yann = int(y + h - (20 * (i + 1))) + cv2.putText(image, anno, (xann, yann), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 255), 2) return image