Pull request #29: Evaluate layout detection

Merge in RR/cv-analysis from evaluate_layout_detection to master

Squashed commit of the following:

commit 8ec2f69fc61d1e15bd502b0a2c1f720cbec2b34e
Author: llocarnini <lillian.locarnini@iqser.com>
Date:   Tue Aug 23 15:07:21 2022 +0200

    repaired is_not_included() logic (did drop the outer rectangle, not the included)

commit 97be081d1e60989313924ceac0bfb3062229411e
Merge: 2c28fa2 2b5c4f1
Author: llocarnini <lillian.locarnini@iqser.com>
Date:   Tue Aug 23 14:28:14 2022 +0200

    Merge branch 'master' of ssh://git.iqser.com:2222/rr/cv-analysis into evaluate_layout_detection

commit 2c28fa280b7eff922c715245fffe69702c7e6742
Author: llocarnini <lillian.locarnini@iqser.com>
Date:   Tue Aug 23 13:50:17 2022 +0200

    del print statements

commit c60121fc4faebc5de556ec0ab7a3af4f815f7ce1
Author: llocarnini <lillian.locarnini@iqser.com>
Date:   Mon Aug 22 10:51:52 2022 +0200

    few changes to connect_rects.py

commit a99719905d58cbe856fa020177abd7e317c1d072
Author: llocarnini <lillian.locarnini@iqser.com>
Date:   Thu Aug 18 08:37:12 2022 +0200

    layout parsing improved with connect_rects.py

commit d693688a0f0d63395cfd36645de7b3417f64de30
Author: llocarnini <lillian.locarnini@iqser.com>
Date:   Tue Aug 2 09:31:19 2022 +0200

    removed vizlogger instances
This commit is contained in:
lillian locarnini 2022-08-23 15:09:51 +02:00
parent 2b5c4f1e45
commit 95cab33f19
5 changed files with 172 additions and 32 deletions

View File

@ -1,3 +1,4 @@
import itertools
from itertools import compress
from itertools import starmap
from operator import __and__
@ -5,6 +6,8 @@ from operator import __and__
import cv2
import numpy as np
from cv_analysis.utils.connect_rects import connect_related_rects2
from cv_analysis.utils.structures import Rectangle
from cv_analysis.utils.postprocessing import (
remove_overlapping,
@ -13,14 +16,13 @@ from cv_analysis.utils.postprocessing import (
)
from cv_analysis.utils.visual_logging import vizlogger
#could be dynamic parameter is the scan is noisy
def is_likely_segment(rect, min_area=100):
return cv2.contourArea(rect, False) > min_area
def find_segments(image):
contours, hierarchies = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
mask1 = map(is_likely_segment, contours)
mask2 = map(has_no_parent, hierarchies[0])
mask = starmap(__and__, zip(mask1, mask2))
@ -31,24 +33,32 @@ def find_segments(image):
return rectangles
def parse_layout(image: np.array):
def dilate_page_components(image):
#if text is detected in words make kernel bigger
image = cv2.GaussianBlur(image, (7, 7), 0)
thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
return cv2.dilate(thresh, kernel, iterations=4)
def fill_in_component_area(image, rect):
x, y, w, h = rect
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 0), -1)
cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 7)
_, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY)
return ~image
def parse_layout(image: np.array):
image = image.copy()
image_ = image.copy()
if len(image_.shape) > 2:
image_ = cv2.cvtColor(image_, cv2.COLOR_BGR2GRAY)
vizlogger.debug(image_, "layout01_start.png")
image_ = cv2.GaussianBlur(image_, (7, 7), 0)
vizlogger.debug(image_, "layout02_blur.png")
thresh = cv2.threshold(image_, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
vizlogger.debug(image_, "layout03_theshold.png")
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
vizlogger.debug(kernel, "layout04_kernel.png")
dilate = cv2.dilate(thresh, kernel, iterations=4)
vizlogger.debug(dilate, "layout05_dilate.png")
dilate = dilate_page_components(image_)
# show_mpl(dilate)
rects = list(find_segments(dilate))
@ -57,21 +67,21 @@ def parse_layout(image: np.array):
x, y, w, h = rect
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 0), -1)
cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 7)
vizlogger.debug(image, "layout06_rectangles.png")
# show_mpl(image)
_, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY)
vizlogger.debug(image, "layout07_threshold.png")
image = ~image
vizlogger.debug(image, "layout08_inverse.png")
# show_mpl(image)
if len(image.shape) > 2:
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
vizlogger.debug(image, "layout09_convertcolor.png")
rects = find_segments(image)
# <- End of meta detection
rects = list(map(Rectangle.from_xywh, rects))
rects = remove_included(rects)
rects = remove_overlapping(rects)
return list(map(Rectangle.from_xywh, rects))
rects = map(lambda r: r.xywh(), rects)
rects = connect_related_rects2(rects)
rects = list(map(Rectangle.from_xywh, rects))
rects = remove_included(rects)
return rects

View File

@ -0,0 +1,120 @@
from itertools import combinations, starmap, product
from typing import Iterable
def is_near_enough(rect_pair, max_gap=14):
x1, y1, w1, h1 = rect_pair[0]
x2, y2, w2, h2 = rect_pair[1]
return any([abs(x1 - (x2 + w2)) <= max_gap,
abs(x2 - (x1 + w1)) <= max_gap,
abs(y2 - (y1 + h1)) <= max_gap,
abs(y1 - (y2 + h2)) <= max_gap])
def is_overlapping(rect_pair):
x1, y1, w1, h1 = rect_pair[0]
x2, y2, w2, h2 = rect_pair[1]
dx = min(x1 + w1, x2 + w2) - max(x1, x2)
dy = min(y1 + h1, y2 + h2) - max(y1, y2)
return True if (dx >= 0) and (dy >= 0) else False
def is_on_same_line(rect_pair):
x1, y1, w1, h1 = rect_pair[0]
x2, y2, w2, h2 = rect_pair[1]
return any([any([abs(y1 - y2) <= 10,
abs(y1 + h1 - (y2 + h2)) <= 10]),
any([y2 <= y1 and y1 + h1 <= y2 + h2,
y1 <= y2 and y2 + h2 <= y1 + h1])])
def has_correct_position1(rect_pair):
x1, y1, w1, h1 = rect_pair[0]
x2, y2, w2, h2 = rect_pair[1]
return any([any([abs(x1 - x2) <= 10,
abs(y1 - y2) <= 10,
abs(x1 + w1 - (x2 + w2)) <= 10,
abs(y1 + h1 - (y2 + h2)) <= 10]),
any([y2 <= y1 and y1 + h1 <= y2 + h2,
y1 <= y2 and y2 + h2 <= y1 + h1,
x2 <= x1 and x1 + w1 <= x2 + w2,
x1 <= x2 and x2 + w2 <= x1 + w1])])
def is_related(rect_pair):
return (is_near_enough(rect_pair) and has_correct_position1(rect_pair)) or is_overlapping(
rect_pair)
def fuse_rects(rect1, rect2):
if rect1 == rect2:
return rect1
x1, y1, w1, h1 = rect1
x2, y2, w2, h2 = rect2
topleft = list(min(product([x1, x2], [y1, y2])))
bottomright = list(max(product([x1 + w1, x2 + w2], [y1 + h1, y2 + h2])))
w = [bottomright[0] - topleft[0]]
h = [bottomright[1] - topleft[1]]
return tuple(topleft + w + h)
def rects_not_the_same(r):
return r[0] != r[1]
def find_related_rects(rects):
rect_pairs = list(filter(is_related, combinations(rects, 2)))
rect_pairs = list(filter(rects_not_the_same, rect_pairs))
if not rect_pairs:
return [], rects
rel_rects = list(set([rect for pair in rect_pairs for rect in pair]))
unrel_rects = [rect for rect in rects if rect not in rel_rects]
return rect_pairs, unrel_rects
def connect_related_rects(rects):
rects_to_connect, rects_new = find_related_rects(rects)
while len(rects_to_connect) > 0:
rects_fused = list(starmap(fuse_rects, rects_to_connect))
rects_fused = list(dict.fromkeys(rects_fused))
if len(rects_fused) == 1:
rects_new += rects_fused
rects_fused = []
rects_to_connect, connected_rects = find_related_rects(rects_fused)
rects_new += connected_rects
if len(rects_to_connect) > 1 and len(set(rects_to_connect)) == 1:
rects_new.append(rects_fused[0])
rects_to_connect = []
return rects_new
def connect_related_rects2(rects: Iterable[tuple]):
rects = list(rects)
current_idx = 0
while True:
if current_idx + 1 >= len(rects) or len(rects) <= 1:
break
merge_happened = False
current_rect = rects.pop(current_idx)
for idx, maybe_related_rect in enumerate(rects):
if is_related((current_rect, maybe_related_rect)):
current_rect = fuse_rects(current_rect, maybe_related_rect)
rects.pop(idx)
merge_happened = True
break
rects.insert(0, current_rect)
if not merge_happened:
current_idx += 1
elif merge_happened:
current_idx = 0
return rects

View File

@ -17,8 +17,8 @@ def remove_overlapping(rectangles: Iterable[Rectangle]) -> list[Rectangle]:
def remove_included(rectangles: Iterable[Rectangle]) -> list[Rectangle]:
rectangles = list(filter(partial(Rectangle.is_not_included, rectangles=rectangles), rectangles))
return rectangles
keep = [rect for rect in rectangles if not rect.is_included(rectangles)]
return keep
def __remove_isolated_unsorted(rectangles: Iterable[Rectangle]) -> list[Rectangle]:

View File

@ -67,17 +67,17 @@ class Rectangle:
union = self.area() + rect.area() - intersection
return intersection / union
def includes(self, rect: "Rectangle", tol=3):
def includes(self, other: "Rectangle", tol=3):
"""does a include b?"""
return (
rect.x1 + tol >= self.x1
and rect.y1 + tol >= self.y1
and rect.x2 - tol <= self.x2
and rect.y2 - tol <= self.y2
other.x1 + tol >= self.x1
and other.y1 + tol >= self.y1
and other.x2 - tol <= self.x2
and other.y2 - tol <= self.y2
)
def is_not_included(self, rectangles: Iterable["Rectangle"]):
return not any(self.includes(rect) for rect in rectangles if not rect == self)
def is_included(self, rectangles: Iterable["Rectangle"]):
return any(rect.includes(self) for rect in rectangles if not rect == self)
def adjacent(self, rect2: "Rectangle", tolerance=7):
# tolerance=1 was set too low; most lines are 2px wide

View File

@ -1,4 +1,6 @@
import argparse
import timeit
from time import process_time
from itertools import starmap
from pathlib import Path
@ -46,8 +48,16 @@ def get_analysis_fn(analysis_type):
if __name__ == "__main__":
args = parse_args()
t0 = timeit.default_timer()
with open(args.pdf_path, "rb") as f:
pdf_bytes = f.read()
images = convert_pages_to_images(pdf_bytes)
images = convert_pages_to_images(pdf_bytes)
t1 = timeit.default_timer()
annotated_pages = analyse_and_annotate(images=images, analysis_fn=get_analysis_fn(args.type))
t2 = timeit.default_timer()
save_as_pdf(annotated_pages, args.output_folder, Path(args.pdf_path).stem, args.type)
t3 = timeit.default_timer()
print("[s] opening file and convert pdf pages to images: ", t1-t0)
print("[s] analyse and annotate images: ", t2-t1)
print("[s] save images as pdf: ", t3-t2)