some changes to fix some minor bugs in table_parsing.py and post_processing.py
This commit is contained in:
parent
4964c8f5a1
commit
07907d45dd
@ -76,12 +76,12 @@ def has_table_shape(rects):
|
||||
def parse_table(image: np.array):
|
||||
def is_large_enough(stat):
|
||||
x1, y1, w, h, area = stat
|
||||
# was set too higg (3000): Boxes in a Table can definetly be smaller. example: a column titled "No." This cell has approximatly an area of 500 px based on 11pt letters
|
||||
# with extra condition for the length of height and width weirdly narrow rectangles can be filtered
|
||||
# was set too higg (3000): Boxes in a Table can be smaller. example: a column titled "No." This cell has approximatly an area of 500 px based on 11pt letters
|
||||
# with extra condition for the length of height and width, weirdly narrow rectangles can be filtered
|
||||
return area > 500 and w > 35 and h > 15
|
||||
|
||||
gray_scale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
#changed threshold value from 150 to 200 b
|
||||
#changed threshold value from 150 to 200 because of a shaded edgecase table
|
||||
th1, img_bin = cv2.threshold(gray_scale, 195, 255, cv2.THRESH_BINARY)
|
||||
img_bin = ~img_bin
|
||||
|
||||
@ -91,12 +91,10 @@ def parse_table(image: np.array):
|
||||
_, _, stats, _ = cv2.connectedComponentsWithStats(~img_bin_final, connectivity=8, ltype=cv2.CV_32S)
|
||||
|
||||
stats = np.vstack(list(filter(is_large_enough, stats)))
|
||||
print(stats)
|
||||
rects = stats[:, :-1][2:]
|
||||
|
||||
# FIXME: produces false negatives for `data0/043d551b4c4c768b899eaece4466c836.pdf 1 --type table`
|
||||
rects = list(remove_isolated(rects, input_sorted=True))
|
||||
print(rects)
|
||||
|
||||
# print(f"{has_table_shape(rects) = }")
|
||||
# if not has_table_shape(rects):
|
||||
|
||||
@ -43,7 +43,6 @@ def adjacent(a, b):
|
||||
"""
|
||||
|
||||
def adjacent2d(g, h, i, j, k, l):
|
||||
#print(abs(g-h), [k <= p <= l for p in [i, j]])
|
||||
return adjacent1d(g, h) and any(k <= p <= l for p in [i, j])
|
||||
|
||||
if any(x is None for x in (a, b)):
|
||||
@ -51,17 +50,17 @@ def adjacent(a, b):
|
||||
|
||||
v1 = a.xmin, a.ymin
|
||||
v2 = a.xmax, a.ymax
|
||||
print("topleft and bottom right rec1", v1,v2)
|
||||
#print("topleft and bottom right rec1", v1,v2)
|
||||
w1 = b.xmin, b.ymin
|
||||
w2 = b.xmax, b.ymax
|
||||
print("topleft and bottom right rec2", w1, w2)
|
||||
#print("topleft and bottom right rec2", w1, w2)
|
||||
# some rectangles are compared twice
|
||||
print((
|
||||
adjacent2d(v2[0], w1[0], w1[1], w2[1], v1[1], v2[1]),
|
||||
adjacent2d(v1[0], w2[0], w1[1], w2[1], v1[1], v2[1]),
|
||||
adjacent2d(v2[1], w1[1], w1[0], w2[0], v1[0], v2[0]),
|
||||
adjacent2d(v1[1], w2[1], w1[0], w2[0], v1[0], v2[0]),
|
||||
))
|
||||
# print((
|
||||
# adjacent2d(v2[0], w1[0], w1[1], w2[1], v1[1], v2[1]),
|
||||
# adjacent2d(v1[0], w2[0], w1[1], w2[1], v1[1], v2[1]),
|
||||
# adjacent2d(v2[1], w1[1], w1[0], w2[0], v1[0], v2[0]),
|
||||
# adjacent2d(v1[1], w2[1], w1[0], w2[0], v1[0], v2[0]),
|
||||
# ))
|
||||
return any(
|
||||
(
|
||||
adjacent2d(v2[0], w1[0], w1[1], w2[1], v1[1], v2[1]),
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user