reformatting

2022-04-26 16:01:57 +02:00 · 2022-04-26 16:01:57 +02:00 · 4ac1cce0e8
commit 4ac1cce0e8
parent 9327fb7231
8 changed files with 23 additions and 33 deletions
--- a/cv_analysis/figure_detection.py
+++ b/cv_analysis/figure_detection.py
@ -41,4 +41,3 @@ def detect_figures_in_pdf(pdf_path, page_index=1, show=False):
    vizlogger.debug(page, "figures03_final.png")
    if show:
        show_mpl(page)
-    
--- a/cv_analysis/layout_parsing.py
+++ b/cv_analysis/layout_parsing.py
@ -86,7 +86,7 @@ def annotate_layout_in_pdf(pdf_path, page_index=1, show=False):

    if show:
        show_mpl(page)
-    
+

 """
 def find_layout_boxes(image: np.array):
--- a/cv_analysis/redaction_detection.py
+++ b/cv_analysis/redaction_detection.py
@ -51,4 +51,3 @@ def annotate_redactions_in_pdf(pdf_path, page_index=1, show=False):

    if show:
        show_mpl(page)
-    
--- a/cv_analysis/table_parsing.py
+++ b/cv_analysis/table_parsing.py
@ -26,12 +26,12 @@ def add_external_contours(image, img):

 def apply_motion_blur(image: np.array, angle, size=80):
    """Solidifies and slightly extends detected lines.
-    
+
    Args:
        image (np.array): page image as array
        angle: direction in which to apply blur, 0 or 90
        size (int): kernel size; 80 found empirically to work well
-        
+
    Returns:
        np.array

@ -50,8 +50,8 @@ def apply_motion_blur(image: np.array, angle, size=80):

 def isolate_vertical_and_horizontal_components(img_bin, bounding_rects):
    """Identifies and reinforces horizontal and vertical lines in a binary image.
-    
-    Args: 
+
+    Args:
        img_bin (np.array): array corresponding to single binarized page image
        bounding_rects (list): list of layout boxes of the form (x, y, w, h), potentially containing tables

@ -140,11 +140,11 @@ def preprocess(image: np.array):


 def parse_table(image: np.array, show=False):
-    """Runs the full table parsing process. 
-    
+    """Runs the full table parsing process.
+
    Args:
        image (np.array): single PDF page, opened as PIL.Image object and converted to a numpy array
-        
+
    Returns:
        list: list of rectangles corresponding to table cells
    """
@ -154,10 +154,10 @@ def parse_table(image: np.array, show=False):
        return area > 2000 and w > 35 and h > 25

    image = preprocess(image)
-    
+
    table_layout_boxes = find_table_layout_boxes(image)
    image = isolate_vertical_and_horizontal_components(image, table_layout_boxes)
-    
+
    _, _, stats, _ = cv2.connectedComponentsWithStats(~image, connectivity=8, ltype=cv2.CV_32S)

    stats = np.vstack(list(filter(is_large_enough, stats)))
--- a/cv_analysis/utils/visual_logging.py
+++ b/cv_analysis/utils/visual_logging.py
@ -2,21 +2,21 @@ import os
 from cv_analysis.config import CONFIG
 from cv_analysis.utils.display import save_mpl

-LEVEL = CONFIG.visual_logging.level
-OUTPUT_FOLDER = CONFIG.visual_logging.output_folder
-

 class VisualLogger:
-    def __init__(self):
-        self.level_is_debug = LEVEL == "DEBUG"
-        self.output_folder = OUTPUT_FOLDER
+    def __init__(self, level, output_folder):
+        self.level = level
+        self.output_folder = output_folder
        if not os.path.exists(self.output_folder):
            os.mkdir(self.output_folder)

    def debug(self, img, name):
-        if self.level_is_debug:
+        if self.level_is_debug():
            output_path = os.path.join(self.output_folder, name)
            save_mpl(img, output_path)
+    
+    def level_is_debug(self):
+        return self.level == "DEBUG"


-vizlogger = VisualLogger()
+vizlogger = VisualLogger(CONFIG.visual_logging.level, CONFIG.visual_logging.output_folder)
--- a/scripts/annotate.py
+++ b/scripts/annotate.py
@ -20,7 +20,7 @@ def parse_args():

 if __name__ == "__main__":
    args = parse_args()
-    #print(args.show)
+    # print(args.show)
    if args.type == "table":
        annotate_tables_in_pdf(args.pdf_path, page_index=args.page_index, show=args.show)
    elif args.type == "redaction":
--- a/scripts/client_mock.py
+++ b/scripts/client_mock.py
@ -34,7 +34,7 @@ def parse_args():


 def main(args):
-    
+
    operations = args.operations.split(",")
    for operation in operations:
        print("****************************")
--- a/src/run_service.py
+++ b/src/run_service.py
@ -87,20 +87,12 @@ def main():
    tracemalloc.stop()


-
 def make_annotations(pdf, annotation_function):
    results = []
    for i, page in enumerate(pdf):
        boxes = annotation_function(page)
-        cells= []
-        if boxes:
-            cells = [{"x": x, "y": y, "width": w, "height": h} for x,y,w,h in boxes]
-        results.append({
-            "page": i, 
-            "pageWidth": page.shape[1],
-            "pageHeight": page.shape[0],
-            "cells": cells
-            })
+        cells = [{"x": x, "y": y, "width": w, "height": h} for x, y, w, h in boxes]
+        results.append({"page": i, "pageWidth": page.shape[1], "pageHeight": page.shape[0], "cells": cells})
    logger.info(str(results))
    logger.info(type(results))
    output_dict = {"pages": results}
@ -118,7 +110,7 @@ def annotate(annotation_function):
        logger.info(f"Processing data.")
        pdf, angles = open_pdf(data)
        annotations = make_annotations(pdf, annotation_function)
-        #if CONFIG.deskew.function != "identity":
+        # if CONFIG.deskew.function != "identity":
        #    annotations.update({"deskew_angles": angles})
        return annotations