From 07907d45dd3c0a0fa6383ec4a938846ed8c375b1 Mon Sep 17 00:00:00 2001
From: llocarnini <lillian.locarnini@iqser.com>
Date: Thu, 10 Feb 2022 10:56:03 +0100
Subject: [PATCH] some changes to fix some minor bugs in table_parsing.py and
 post_processing.py

---
 vidocp/table_parsing.py         |  8 +++-----
 vidocp/utils/post_processing.py | 17 ++++++++---------
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/vidocp/table_parsing.py b/vidocp/table_parsing.py
index 0131c3c..adaa210 100644
--- a/vidocp/table_parsing.py
+++ b/vidocp/table_parsing.py
@@ -76,12 +76,12 @@ def has_table_shape(rects):
 def parse_table(image: np.array):
     def is_large_enough(stat):
         x1, y1, w, h, area = stat
-        #  was set too higg (3000): Boxes in a Table can definetly be smaller. example: a column titled "No." This cell has approximatly an area of 500 px based on 11pt letters
-        #  with extra condition for the length of height and width weirdly narrow rectangles can be filtered
+        #  was set too higg (3000): Boxes in a Table can be smaller. example: a column titled "No." This cell has approximatly an area of 500 px based on 11pt letters
+        #  with extra condition for the length of height and width, weirdly narrow rectangles can be filtered
         return area > 500 and w > 35 and h > 15
 
     gray_scale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    #changed threshold value from 150 to 200 b
+    #changed threshold value from 150 to 200 because of a shaded edgecase table
     th1, img_bin = cv2.threshold(gray_scale, 195, 255, cv2.THRESH_BINARY)
     img_bin = ~img_bin
 
@@ -91,12 +91,10 @@ def parse_table(image: np.array):
     _, _, stats, _ = cv2.connectedComponentsWithStats(~img_bin_final, connectivity=8, ltype=cv2.CV_32S)
 
     stats = np.vstack(list(filter(is_large_enough, stats)))
-    print(stats)
     rects = stats[:, :-1][2:]
 
     # FIXME: produces false negatives for `data0/043d551b4c4c768b899eaece4466c836.pdf 1 --type table`
     rects = list(remove_isolated(rects, input_sorted=True))
-    print(rects)
 
     # print(f"{has_table_shape(rects) = }")
     # if not has_table_shape(rects):
diff --git a/vidocp/utils/post_processing.py b/vidocp/utils/post_processing.py
index 77f8cab..6cc9452 100644
--- a/vidocp/utils/post_processing.py
+++ b/vidocp/utils/post_processing.py
@@ -43,7 +43,6 @@ def adjacent(a, b):
     """
 
     def adjacent2d(g, h, i, j, k, l):
-        #print(abs(g-h), [k <= p <= l for p in [i, j]])
         return adjacent1d(g, h) and any(k <= p <= l for p in [i, j])
 
     if any(x is None for x in (a, b)):
@@ -51,17 +50,17 @@ def adjacent(a, b):
 
     v1 = a.xmin, a.ymin
     v2 = a.xmax, a.ymax
-    print("topleft and bottom right rec1", v1,v2)
+    #print("topleft and bottom right rec1", v1,v2)
     w1 = b.xmin, b.ymin
     w2 = b.xmax, b.ymax
-    print("topleft and bottom right rec2", w1, w2)
+    #print("topleft and bottom right rec2", w1, w2)
     # some rectangles are compared twice
-    print((
-            adjacent2d(v2[0], w1[0], w1[1], w2[1], v1[1], v2[1]),
-            adjacent2d(v1[0], w2[0], w1[1], w2[1], v1[1], v2[1]),
-            adjacent2d(v2[1], w1[1], w1[0], w2[0], v1[0], v2[0]),
-            adjacent2d(v1[1], w2[1], w1[0], w2[0], v1[0], v2[0]),
-        ))
+    # print((
+    #         adjacent2d(v2[0], w1[0], w1[1], w2[1], v1[1], v2[1]),
+    #         adjacent2d(v1[0], w2[0], w1[1], w2[1], v1[1], v2[1]),
+    #         adjacent2d(v2[1], w1[1], w1[0], w2[0], v1[0], v2[0]),
+    #         adjacent2d(v1[1], w2[1], w1[0], w2[0], v1[0], v2[0]),
+    #     ))
     return any(
         (
             adjacent2d(v2[0], w1[0], w1[1], w2[1], v1[1], v2[1]),