diff --git a/cv_analysis/figure_detection.py b/cv_analysis/figure_detection.py index 4d50233..38f3f48 100644 --- a/cv_analysis/figure_detection.py +++ b/cv_analysis/figure_detection.py @@ -41,4 +41,3 @@ def detect_figures_in_pdf(pdf_path, page_index=1, show=False): vizlogger.debug(page, "figures03_final.png") if show: show_mpl(page) - \ No newline at end of file diff --git a/cv_analysis/layout_parsing.py b/cv_analysis/layout_parsing.py index 2d6dcd3..290a3d0 100644 --- a/cv_analysis/layout_parsing.py +++ b/cv_analysis/layout_parsing.py @@ -86,7 +86,7 @@ def annotate_layout_in_pdf(pdf_path, page_index=1, show=False): if show: show_mpl(page) - + """ def find_layout_boxes(image: np.array): diff --git a/cv_analysis/redaction_detection.py b/cv_analysis/redaction_detection.py index f4fe7ca..e81ef53 100644 --- a/cv_analysis/redaction_detection.py +++ b/cv_analysis/redaction_detection.py @@ -51,4 +51,3 @@ def annotate_redactions_in_pdf(pdf_path, page_index=1, show=False): if show: show_mpl(page) - \ No newline at end of file diff --git a/cv_analysis/table_parsing.py b/cv_analysis/table_parsing.py index a6a6afd..11e64d2 100644 --- a/cv_analysis/table_parsing.py +++ b/cv_analysis/table_parsing.py @@ -26,12 +26,12 @@ def add_external_contours(image, img): def apply_motion_blur(image: np.array, angle, size=80): """Solidifies and slightly extends detected lines. - + Args: image (np.array): page image as array angle: direction in which to apply blur, 0 or 90 size (int): kernel size; 80 found empirically to work well - + Returns: np.array @@ -50,8 +50,8 @@ def apply_motion_blur(image: np.array, angle, size=80): def isolate_vertical_and_horizontal_components(img_bin, bounding_rects): """Identifies and reinforces horizontal and vertical lines in a binary image. - - Args: + + Args: img_bin (np.array): array corresponding to single binarized page image bounding_rects (list): list of layout boxes of the form (x, y, w, h), potentially containing tables @@ -140,11 +140,11 @@ def preprocess(image: np.array): def parse_table(image: np.array, show=False): - """Runs the full table parsing process. - + """Runs the full table parsing process. + Args: image (np.array): single PDF page, opened as PIL.Image object and converted to a numpy array - + Returns: list: list of rectangles corresponding to table cells """ @@ -154,10 +154,10 @@ def parse_table(image: np.array, show=False): return area > 2000 and w > 35 and h > 25 image = preprocess(image) - + table_layout_boxes = find_table_layout_boxes(image) image = isolate_vertical_and_horizontal_components(image, table_layout_boxes) - + _, _, stats, _ = cv2.connectedComponentsWithStats(~image, connectivity=8, ltype=cv2.CV_32S) stats = np.vstack(list(filter(is_large_enough, stats))) diff --git a/cv_analysis/utils/visual_logging.py b/cv_analysis/utils/visual_logging.py index 19bbded..6afbd57 100644 --- a/cv_analysis/utils/visual_logging.py +++ b/cv_analysis/utils/visual_logging.py @@ -2,21 +2,21 @@ import os from cv_analysis.config import CONFIG from cv_analysis.utils.display import save_mpl -LEVEL = CONFIG.visual_logging.level -OUTPUT_FOLDER = CONFIG.visual_logging.output_folder - class VisualLogger: - def __init__(self): - self.level_is_debug = LEVEL == "DEBUG" - self.output_folder = OUTPUT_FOLDER + def __init__(self, level, output_folder): + self.level = level + self.output_folder = output_folder if not os.path.exists(self.output_folder): os.mkdir(self.output_folder) def debug(self, img, name): - if self.level_is_debug: + if self.level_is_debug(): output_path = os.path.join(self.output_folder, name) save_mpl(img, output_path) + + def level_is_debug(self): + return self.level == "DEBUG" -vizlogger = VisualLogger() +vizlogger = VisualLogger(CONFIG.visual_logging.level, CONFIG.visual_logging.output_folder) diff --git a/scripts/annotate.py b/scripts/annotate.py index a5d8e20..c92ecf1 100644 --- a/scripts/annotate.py +++ b/scripts/annotate.py @@ -20,7 +20,7 @@ def parse_args(): if __name__ == "__main__": args = parse_args() - #print(args.show) + # print(args.show) if args.type == "table": annotate_tables_in_pdf(args.pdf_path, page_index=args.page_index, show=args.show) elif args.type == "redaction": diff --git a/scripts/client_mock.py b/scripts/client_mock.py index 96ab9b4..a64fe95 100644 --- a/scripts/client_mock.py +++ b/scripts/client_mock.py @@ -34,7 +34,7 @@ def parse_args(): def main(args): - + operations = args.operations.split(",") for operation in operations: print("****************************") diff --git a/src/run_service.py b/src/run_service.py index 304e8d8..876b96e 100644 --- a/src/run_service.py +++ b/src/run_service.py @@ -87,20 +87,12 @@ def main(): tracemalloc.stop() - def make_annotations(pdf, annotation_function): results = [] for i, page in enumerate(pdf): boxes = annotation_function(page) - cells= [] - if boxes: - cells = [{"x": x, "y": y, "width": w, "height": h} for x,y,w,h in boxes] - results.append({ - "page": i, - "pageWidth": page.shape[1], - "pageHeight": page.shape[0], - "cells": cells - }) + cells = [{"x": x, "y": y, "width": w, "height": h} for x, y, w, h in boxes] + results.append({"page": i, "pageWidth": page.shape[1], "pageHeight": page.shape[0], "cells": cells}) logger.info(str(results)) logger.info(type(results)) output_dict = {"pages": results} @@ -118,7 +110,7 @@ def annotate(annotation_function): logger.info(f"Processing data.") pdf, angles = open_pdf(data) annotations = make_annotations(pdf, annotation_function) - #if CONFIG.deskew.function != "identity": + # if CONFIG.deskew.function != "identity": # annotations.update({"deskew_angles": angles}) return annotations