kernel size for morphology ex set bit higher, so less non-table structure are detected. Reduced the kernel size of the directional motion blurr and increased the treshold a little bit so narrow cells wont be split up.
Problem with the cell filtering for certain scanned pdfs detected.
This commit is contained in:
parent
d70781f4aa
commit
723c6606e1
@ -25,7 +25,7 @@ def add_external_contours(image, img):
|
||||
|
||||
|
||||
def isolate_vertical_and_horizontal_components(img_bin, bounding_rects):
|
||||
line_min_width = 47
|
||||
line_min_width = 48
|
||||
kernel_h = np.ones((1, line_min_width), np.uint8)
|
||||
kernel_v = np.ones((line_min_width, 1), np.uint8)
|
||||
|
||||
@ -39,15 +39,17 @@ def isolate_vertical_and_horizontal_components(img_bin, bounding_rects):
|
||||
img_bin_v = cv2.dilate(img_bin_v, kernel_v, iterations=2)
|
||||
show_mpl(img_bin_h | img_bin_v)
|
||||
|
||||
img_bin_h = apply_motion_blur(img_bin_h, 100, 0)
|
||||
img_bin_v = apply_motion_blur(img_bin_v, 100, 90)
|
||||
#reduced filtersize from 100 to 80 to minimize splitting narrow cells
|
||||
img_bin_h = apply_motion_blur(img_bin_h, 80, 0)
|
||||
img_bin_v = apply_motion_blur(img_bin_v, 80, 90)
|
||||
|
||||
img_bin_final = img_bin_h | img_bin_v
|
||||
show_mpl(img_bin_final)
|
||||
|
||||
th1, img_bin_final = cv2.threshold(img_bin_final, 110, 255, cv2.THRESH_BINARY)
|
||||
#changed threshold from 110 to 120 to minimize cell splitting
|
||||
th1, img_bin_final = cv2.threshold(img_bin_final, 120, 255, cv2.THRESH_BINARY)
|
||||
img_bin_final = cv2.dilate(img_bin_final, np.ones((1, 1), np.uint8), iterations=1)
|
||||
show_mpl(img_bin_final)
|
||||
# problem if layout parser detects too big of a layout box as in VV-748542.pdf p.22
|
||||
img_bin_final = disconnect_non_existing_cells(img_bin_final, bounding_rects)
|
||||
show_mpl(img_bin_final)
|
||||
|
||||
@ -108,7 +110,7 @@ def find_table_layout_boxes(image: np.array):
|
||||
table_boxes = []
|
||||
for box in layout_boxes:
|
||||
(x, y, w, h) = box
|
||||
if w * h >= 300000:
|
||||
if w * h >= 100000:
|
||||
table_boxes.append(box)
|
||||
return table_boxes
|
||||
|
||||
|
||||
@ -50,17 +50,10 @@ def adjacent(a, b):
|
||||
|
||||
v1 = a.xmin, a.ymin
|
||||
v2 = a.xmax, a.ymax
|
||||
#print("topleft and bottom right rec1", v1,v2)
|
||||
|
||||
w1 = b.xmin, b.ymin
|
||||
w2 = b.xmax, b.ymax
|
||||
#print("topleft and bottom right rec2", w1, w2)
|
||||
# some rectangles are compared twice
|
||||
# print((
|
||||
# adjacent2d(v2[0], w1[0], w1[1], w2[1], v1[1], v2[1]),
|
||||
# adjacent2d(v1[0], w2[0], w1[1], w2[1], v1[1], v2[1]),
|
||||
# adjacent2d(v2[1], w1[1], w1[0], w2[0], v1[0], v2[0]),
|
||||
# adjacent2d(v1[1], w2[1], w1[0], w2[0], v1[0], v2[0]),
|
||||
# ))
|
||||
|
||||
return any(
|
||||
(
|
||||
adjacent2d(v2[0], w1[0], w1[1], w2[1], v1[1], v2[1]),
|
||||
@ -86,9 +79,7 @@ def __remove_isolated_unsorted(rectangles):
|
||||
def __remove_isolated_sorted(rectangles):
|
||||
def is_connected(left, center, right):
|
||||
return any(starmap(adjacent, [(left, center), (center, right)]))
|
||||
|
||||
rectangles = list(map(xywh_to_vec_rect, rectangles))
|
||||
# print("rectangles after coordinates to vetor rectangles", len(rectangles), "\n", rectangles)
|
||||
|
||||
lefts = [None, *rectangles[:-1]]
|
||||
rights = [*rectangles[1:], None]
|
||||
@ -96,7 +87,6 @@ def __remove_isolated_sorted(rectangles):
|
||||
mask = starmap(is_connected, zip(lefts, rectangles, rights))
|
||||
rectangles = compress(rectangles, mask)
|
||||
rectangles = map(vec_rect_to_xywh, rectangles)
|
||||
|
||||
return rectangles
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user