From 95cab33f190871d6e4ba4be2dc3c71fdb57f0f5d Mon Sep 17 00:00:00 2001 From: lillian locarnini Date: Tue, 23 Aug 2022 15:09:51 +0200 Subject: [PATCH] Pull request #29: Evaluate layout detection Merge in RR/cv-analysis from evaluate_layout_detection to master Squashed commit of the following: commit 8ec2f69fc61d1e15bd502b0a2c1f720cbec2b34e Author: llocarnini Date: Tue Aug 23 15:07:21 2022 +0200 repaired is_not_included() logic (did drop the outer rectangle, not the included) commit 97be081d1e60989313924ceac0bfb3062229411e Merge: 2c28fa2 2b5c4f1 Author: llocarnini Date: Tue Aug 23 14:28:14 2022 +0200 Merge branch 'master' of ssh://git.iqser.com:2222/rr/cv-analysis into evaluate_layout_detection commit 2c28fa280b7eff922c715245fffe69702c7e6742 Author: llocarnini Date: Tue Aug 23 13:50:17 2022 +0200 del print statements commit c60121fc4faebc5de556ec0ab7a3af4f815f7ce1 Author: llocarnini Date: Mon Aug 22 10:51:52 2022 +0200 few changes to connect_rects.py commit a99719905d58cbe856fa020177abd7e317c1d072 Author: llocarnini Date: Thu Aug 18 08:37:12 2022 +0200 layout parsing improved with connect_rects.py commit d693688a0f0d63395cfd36645de7b3417f64de30 Author: llocarnini Date: Tue Aug 2 09:31:19 2022 +0200 removed vizlogger instances --- cv_analysis/layout_parsing.py | 54 ++++++++----- cv_analysis/utils/connect_rects.py | 120 ++++++++++++++++++++++++++++ cv_analysis/utils/postprocessing.py | 4 +- cv_analysis/utils/structures.py | 14 ++-- scripts/annotate_pdf.py | 12 ++- 5 files changed, 172 insertions(+), 32 deletions(-) create mode 100644 cv_analysis/utils/connect_rects.py diff --git a/cv_analysis/layout_parsing.py b/cv_analysis/layout_parsing.py index d83e8a5..f5de783 100644 --- a/cv_analysis/layout_parsing.py +++ b/cv_analysis/layout_parsing.py @@ -1,3 +1,4 @@ +import itertools from itertools import compress from itertools import starmap from operator import __and__ @@ -5,6 +6,8 @@ from operator import __and__ import cv2 import numpy as np + +from cv_analysis.utils.connect_rects import connect_related_rects2 from cv_analysis.utils.structures import Rectangle from cv_analysis.utils.postprocessing import ( remove_overlapping, @@ -13,14 +16,13 @@ from cv_analysis.utils.postprocessing import ( ) from cv_analysis.utils.visual_logging import vizlogger - +#could be dynamic parameter is the scan is noisy def is_likely_segment(rect, min_area=100): return cv2.contourArea(rect, False) > min_area def find_segments(image): contours, hierarchies = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) - mask1 = map(is_likely_segment, contours) mask2 = map(has_no_parent, hierarchies[0]) mask = starmap(__and__, zip(mask1, mask2)) @@ -31,24 +33,32 @@ def find_segments(image): return rectangles -def parse_layout(image: np.array): +def dilate_page_components(image): + #if text is detected in words make kernel bigger + image = cv2.GaussianBlur(image, (7, 7), 0) + thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] + kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) + return cv2.dilate(thresh, kernel, iterations=4) + +def fill_in_component_area(image, rect): + x, y, w, h = rect + cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 0), -1) + cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 7) + _, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY) + return ~image + + + +def parse_layout(image: np.array): image = image.copy() image_ = image.copy() if len(image_.shape) > 2: image_ = cv2.cvtColor(image_, cv2.COLOR_BGR2GRAY) - vizlogger.debug(image_, "layout01_start.png") - image_ = cv2.GaussianBlur(image_, (7, 7), 0) - vizlogger.debug(image_, "layout02_blur.png") - thresh = cv2.threshold(image_, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] - vizlogger.debug(image_, "layout03_theshold.png") - - kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) - vizlogger.debug(kernel, "layout04_kernel.png") - dilate = cv2.dilate(thresh, kernel, iterations=4) - vizlogger.debug(dilate, "layout05_dilate.png") + dilate = dilate_page_components(image_) + # show_mpl(dilate) rects = list(find_segments(dilate)) @@ -57,21 +67,21 @@ def parse_layout(image: np.array): x, y, w, h = rect cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 0), -1) cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 7) - vizlogger.debug(image, "layout06_rectangles.png") - + # show_mpl(image) _, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY) - vizlogger.debug(image, "layout07_threshold.png") image = ~image - vizlogger.debug(image, "layout08_inverse.png") - + # show_mpl(image) if len(image.shape) > 2: image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - vizlogger.debug(image, "layout09_convertcolor.png") rects = find_segments(image) # <- End of meta detection - + rects = list(map(Rectangle.from_xywh, rects)) rects = remove_included(rects) - rects = remove_overlapping(rects) - return list(map(Rectangle.from_xywh, rects)) + rects = map(lambda r: r.xywh(), rects) + rects = connect_related_rects2(rects) + rects = list(map(Rectangle.from_xywh, rects)) + rects = remove_included(rects) + + return rects diff --git a/cv_analysis/utils/connect_rects.py b/cv_analysis/utils/connect_rects.py new file mode 100644 index 0000000..09d48bb --- /dev/null +++ b/cv_analysis/utils/connect_rects.py @@ -0,0 +1,120 @@ +from itertools import combinations, starmap, product +from typing import Iterable + + +def is_near_enough(rect_pair, max_gap=14): + x1, y1, w1, h1 = rect_pair[0] + x2, y2, w2, h2 = rect_pair[1] + + return any([abs(x1 - (x2 + w2)) <= max_gap, + abs(x2 - (x1 + w1)) <= max_gap, + abs(y2 - (y1 + h1)) <= max_gap, + abs(y1 - (y2 + h2)) <= max_gap]) + + +def is_overlapping(rect_pair): + x1, y1, w1, h1 = rect_pair[0] + x2, y2, w2, h2 = rect_pair[1] + dx = min(x1 + w1, x2 + w2) - max(x1, x2) + dy = min(y1 + h1, y2 + h2) - max(y1, y2) + return True if (dx >= 0) and (dy >= 0) else False + + +def is_on_same_line(rect_pair): + x1, y1, w1, h1 = rect_pair[0] + x2, y2, w2, h2 = rect_pair[1] + return any([any([abs(y1 - y2) <= 10, + abs(y1 + h1 - (y2 + h2)) <= 10]), + any([y2 <= y1 and y1 + h1 <= y2 + h2, + y1 <= y2 and y2 + h2 <= y1 + h1])]) + + +def has_correct_position1(rect_pair): + x1, y1, w1, h1 = rect_pair[0] + x2, y2, w2, h2 = rect_pair[1] + return any([any([abs(x1 - x2) <= 10, + abs(y1 - y2) <= 10, + abs(x1 + w1 - (x2 + w2)) <= 10, + abs(y1 + h1 - (y2 + h2)) <= 10]), + any([y2 <= y1 and y1 + h1 <= y2 + h2, + y1 <= y2 and y2 + h2 <= y1 + h1, + x2 <= x1 and x1 + w1 <= x2 + w2, + x1 <= x2 and x2 + w2 <= x1 + w1])]) + + +def is_related(rect_pair): + return (is_near_enough(rect_pair) and has_correct_position1(rect_pair)) or is_overlapping( + rect_pair) + + +def fuse_rects(rect1, rect2): + if rect1 == rect2: + return rect1 + x1, y1, w1, h1 = rect1 + x2, y2, w2, h2 = rect2 + + topleft = list(min(product([x1, x2], [y1, y2]))) + bottomright = list(max(product([x1 + w1, x2 + w2], [y1 + h1, y2 + h2]))) + + w = [bottomright[0] - topleft[0]] + h = [bottomright[1] - topleft[1]] + return tuple(topleft + w + h) + + +def rects_not_the_same(r): + return r[0] != r[1] + + +def find_related_rects(rects): + rect_pairs = list(filter(is_related, combinations(rects, 2))) + rect_pairs = list(filter(rects_not_the_same, rect_pairs)) + if not rect_pairs: + return [], rects + rel_rects = list(set([rect for pair in rect_pairs for rect in pair])) + unrel_rects = [rect for rect in rects if rect not in rel_rects] + return rect_pairs, unrel_rects + + +def connect_related_rects(rects): + rects_to_connect, rects_new = find_related_rects(rects) + + while len(rects_to_connect) > 0: + rects_fused = list(starmap(fuse_rects, rects_to_connect)) + rects_fused = list(dict.fromkeys(rects_fused)) + + if len(rects_fused) == 1: + rects_new += rects_fused + rects_fused = [] + + rects_to_connect, connected_rects = find_related_rects(rects_fused) + rects_new += connected_rects + + if len(rects_to_connect) > 1 and len(set(rects_to_connect)) == 1: + rects_new.append(rects_fused[0]) + rects_to_connect = [] + + return rects_new + + +def connect_related_rects2(rects: Iterable[tuple]): + rects = list(rects) + current_idx = 0 + + while True: + if current_idx + 1 >= len(rects) or len(rects) <= 1: + break + merge_happened = False + current_rect = rects.pop(current_idx) + for idx, maybe_related_rect in enumerate(rects): + if is_related((current_rect, maybe_related_rect)): + current_rect = fuse_rects(current_rect, maybe_related_rect) + rects.pop(idx) + merge_happened = True + break + rects.insert(0, current_rect) + if not merge_happened: + current_idx += 1 + elif merge_happened: + current_idx = 0 + + return rects diff --git a/cv_analysis/utils/postprocessing.py b/cv_analysis/utils/postprocessing.py index d620696..ea4c4ee 100644 --- a/cv_analysis/utils/postprocessing.py +++ b/cv_analysis/utils/postprocessing.py @@ -17,8 +17,8 @@ def remove_overlapping(rectangles: Iterable[Rectangle]) -> list[Rectangle]: def remove_included(rectangles: Iterable[Rectangle]) -> list[Rectangle]: - rectangles = list(filter(partial(Rectangle.is_not_included, rectangles=rectangles), rectangles)) - return rectangles + keep = [rect for rect in rectangles if not rect.is_included(rectangles)] + return keep def __remove_isolated_unsorted(rectangles: Iterable[Rectangle]) -> list[Rectangle]: diff --git a/cv_analysis/utils/structures.py b/cv_analysis/utils/structures.py index ac19a5b..cafbf97 100644 --- a/cv_analysis/utils/structures.py +++ b/cv_analysis/utils/structures.py @@ -67,17 +67,17 @@ class Rectangle: union = self.area() + rect.area() - intersection return intersection / union - def includes(self, rect: "Rectangle", tol=3): + def includes(self, other: "Rectangle", tol=3): """does a include b?""" return ( - rect.x1 + tol >= self.x1 - and rect.y1 + tol >= self.y1 - and rect.x2 - tol <= self.x2 - and rect.y2 - tol <= self.y2 + other.x1 + tol >= self.x1 + and other.y1 + tol >= self.y1 + and other.x2 - tol <= self.x2 + and other.y2 - tol <= self.y2 ) - def is_not_included(self, rectangles: Iterable["Rectangle"]): - return not any(self.includes(rect) for rect in rectangles if not rect == self) + def is_included(self, rectangles: Iterable["Rectangle"]): + return any(rect.includes(self) for rect in rectangles if not rect == self) def adjacent(self, rect2: "Rectangle", tolerance=7): # tolerance=1 was set too low; most lines are 2px wide diff --git a/scripts/annotate_pdf.py b/scripts/annotate_pdf.py index 2f0ad83..e65f0a1 100644 --- a/scripts/annotate_pdf.py +++ b/scripts/annotate_pdf.py @@ -1,4 +1,6 @@ import argparse +import timeit +from time import process_time from itertools import starmap from pathlib import Path @@ -46,8 +48,16 @@ def get_analysis_fn(analysis_type): if __name__ == "__main__": args = parse_args() + t0 = timeit.default_timer() with open(args.pdf_path, "rb") as f: pdf_bytes = f.read() - images = convert_pages_to_images(pdf_bytes) + images = convert_pages_to_images(pdf_bytes) + t1 = timeit.default_timer() annotated_pages = analyse_and_annotate(images=images, analysis_fn=get_analysis_fn(args.type)) + t2 = timeit.default_timer() save_as_pdf(annotated_pages, args.output_folder, Path(args.pdf_path).stem, args.type) + t3 = timeit.default_timer() + print("[s] opening file and convert pdf pages to images: ", t1-t0) + print("[s] analyse and annotate images: ", t2-t1) + print("[s] save images as pdf: ", t3-t2) +