Merge branch 'master' of ssh://git.iqser.com:2222/rr/cv-analysis into clean_cv

black
2022-10-06 16:33:36 +02:00 · 2022-10-04 13:40:42 +02:00 · 2022-09-30 09:59:31 +02:00
8 changed files with 46 additions and 41 deletions
--- a/.gitignore
+++ b/.gitignore
@ -25,3 +25,4 @@ build_venv/
 /data/metadata_testing_files.csv
 .coverage
 /data/
+/venv/
--- a/cv_analysis/layout_parsing.py
+++ b/cv_analysis/layout_parsing.py
@ -1,4 +1,3 @@
-import itertools
 from itertools import compress
 from itertools import starmap
 from operator import __and__
@ -6,17 +5,15 @@ from operator import __and__
 import cv2
 import numpy as np

-
 from cv_analysis.utils.connect_rects import connect_related_rects2
-from cv_analysis.utils.structures import Rectangle
 from cv_analysis.utils.postprocessing import (
-    remove_overlapping,
    remove_included,
    has_no_parent,
 )
-from cv_analysis.utils.visual_logging import vizlogger
+from cv_analysis.utils.structures import Rectangle

-#could be dynamic parameter is the scan is noisy
+
+# could be dynamic parameter is the scan is noisy
 def is_likely_segment(rect, min_area=100):
    return cv2.contourArea(rect, False) > min_area

@ -34,7 +31,7 @@ def find_segments(image):


 def dilate_page_components(image):
-    #if text is detected in words make kernel bigger
+    # if text is detected in words make kernel bigger
    image = cv2.GaussianBlur(image, (7, 7), 0)
    thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
@ -49,7 +46,6 @@ def fill_in_component_area(image, rect):
    return ~image


-
 def parse_layout(image: np.array):
    image = image.copy()
    image_ = image.copy()
--- a/cv_analysis/redaction_detection.py
+++ b/cv_analysis/redaction_detection.py
@ -2,7 +2,8 @@ from functools import partial

 import cv2
 import numpy as np
-from iteration_utilities import starfilter, first
+from iteration_utilities import first
+from iteration_utilities._iteration_utilities import starfilter

 from cv_analysis.utils.filters import is_large_enough, is_filled, is_boxy
 from cv_analysis.utils.visual_logging import vizlogger
--- a/cv_analysis/table_parsing.py
+++ b/cv_analysis/table_parsing.py
@ -1,7 +1,3 @@
-from functools import partial
-from itertools import chain, starmap
-from operator import attrgetter
-
 import cv2
 import numpy as np
 from funcy import lmap, lfilter
@ -130,10 +126,10 @@ def parse_tables(image: np.array, show=False):
    image = preprocess(image)
    image = isolate_vertical_and_horizontal_components(image)
    rects = turn_connected_components_into_rects(image)
-    #print(rects, "\n\n")
+    # print(rects, "\n\n")
    rects = list(map(Rectangle.from_xywh, rects))
-    #print(rects, "\n\n")
+    # print(rects, "\n\n")
    rects = remove_isolated(rects)
-    #print(rects, "\n\n")
-    
+    # print(rects, "\n\n")
+
    return rects
--- a/cv_analysis/utils/connect_rects.py
+++ b/cv_analysis/utils/connect_rects.py
@ -6,10 +6,14 @@ def is_near_enough(rect_pair, max_gap=14):
    x1, y1, w1, h1 = rect_pair[0]
    x2, y2, w2, h2 = rect_pair[1]

-    return any([abs(x1 - (x2 + w2)) <= max_gap,
-                abs(x2 - (x1 + w1)) <= max_gap,
-                abs(y2 - (y1 + h1)) <= max_gap,
-                abs(y1 - (y2 + h2)) <= max_gap])
+    return any(
+        [
+            abs(x1 - (x2 + w2)) <= max_gap,
+            abs(x2 - (x1 + w1)) <= max_gap,
+            abs(y2 - (y1 + h1)) <= max_gap,
+            abs(y1 - (y2 + h2)) <= max_gap,
+        ]
+    )


 def is_overlapping(rect_pair):
@ -23,28 +27,36 @@ def is_overlapping(rect_pair):
 def is_on_same_line(rect_pair):
    x1, y1, w1, h1 = rect_pair[0]
    x2, y2, w2, h2 = rect_pair[1]
-    return any([any([abs(y1 - y2) <= 10,
-                     abs(y1 + h1 - (y2 + h2)) <= 10]),
-                any([y2 <= y1 and y1 + h1 <= y2 + h2,
-                     y1 <= y2 and y2 + h2 <= y1 + h1])])
+    return any(
+        [
+            any([abs(y1 - y2) <= 10, abs(y1 + h1 - (y2 + h2)) <= 10]),
+            any([y2 <= y1 and y1 + h1 <= y2 + h2, y1 <= y2 and y2 + h2 <= y1 + h1]),
+        ]
+    )


 def has_correct_position1(rect_pair):
    x1, y1, w1, h1 = rect_pair[0]
    x2, y2, w2, h2 = rect_pair[1]
-    return any([any([abs(x1 - x2) <= 10,
-                     abs(y1 - y2) <= 10,
-                     abs(x1 + w1 - (x2 + w2)) <= 10,
-                     abs(y1 + h1 - (y2 + h2)) <= 10]),
-                any([y2 <= y1 and y1 + h1 <= y2 + h2,
-                     y1 <= y2 and y2 + h2 <= y1 + h1,
-                     x2 <= x1 and x1 + w1 <= x2 + w2,
-                     x1 <= x2 and x2 + w2 <= x1 + w1])])
+    return any(
+        [
+            any(
+                [abs(x1 - x2) <= 10, abs(y1 - y2) <= 10, abs(x1 + w1 - (x2 + w2)) <= 10, abs(y1 + h1 - (y2 + h2)) <= 10]
+            ),
+            any(
+                [
+                    y2 <= y1 and y1 + h1 <= y2 + h2,
+                    y1 <= y2 and y2 + h2 <= y1 + h1,
+                    x2 <= x1 and x1 + w1 <= x2 + w2,
+                    x1 <= x2 and x2 + w2 <= x1 + w1,
+                ]
+            ),
+        ]
+    )


 def is_related(rect_pair):
-    return (is_near_enough(rect_pair) and has_correct_position1(rect_pair)) or is_overlapping(
-        rect_pair)
+    return (is_near_enough(rect_pair) and has_correct_position1(rect_pair)) or is_overlapping(rect_pair)


 def fuse_rects(rect1, rect2):
--- a/cv_analysis/utils/postprocessing.py
+++ b/cv_analysis/utils/postprocessing.py
@ -1,7 +1,7 @@
-from collections import namedtuple
 from functools import partial
 from itertools import starmap, compress
 from typing import Iterable
+
 from cv_analysis.utils.structures import Rectangle


--- a/scripts/annotate.py
+++ b/scripts/annotate.py
@ -46,5 +46,6 @@ if __name__ == "__main__":
        from cv_analysis.layout_parsing import parse_layout as analyze
    elif args.type == "figure":
        from cv_analysis.figure_detection.figure_detection import detect_figures
+
        analyze = detect_figures
    annotate_page(page, analyze, draw, name=name, show=args.show)
--- a/scripts/annotate_pdf.py
+++ b/scripts/annotate_pdf.py
@ -1,6 +1,5 @@
 import argparse
 import timeit
-from time import process_time
 from itertools import starmap
 from pathlib import Path

@ -57,7 +56,6 @@ if __name__ == "__main__":
    t2 = timeit.default_timer()
    save_as_pdf(annotated_pages, args.output_folder, Path(args.pdf_path).stem, args.type)
    t3 = timeit.default_timer()
-    print("[s] opening file and convert pdf pages to images: ", t1-t0)
-    print("[s] analyse and annotate images: ", t2-t1)
-    print("[s] save images as pdf: ", t3-t2)
-
+    print("[s] opening file and convert pdf pages to images: ", t1 - t0)
+    print("[s] analyse and annotate images: ", t2 - t1)
+    print("[s] save images as pdf: ", t3 - t2)
Author	SHA1	Message	Date
cdietrich	e3f06da823	Merge branch 'master' of ssh://git.iqser.com:2222/rr/cv-analysis into clean_cv	2022-10-06 16:33:36 +02:00
cdietrich	c25c8d764e	black	2022-10-04 13:40:42 +02:00
cdietrich	dcab1e8616	black	2022-09-30 09:59:31 +02:00