From 9327fb7231a7097b2e57d2cefd82fe4c8b1ffeeb Mon Sep 17 00:00:00 2001
From: Isaac Riley <Isaac.Riley@iqser.com>
Date: Fri, 22 Apr 2022 11:22:16 +0200
Subject: [PATCH 1/5] fixed json format and refactored service functions

---
 Dockerfile             |  1 +
 config.yaml            |  2 +-
 scripts/client_mock.py |  2 +-
 src/run_service.py     | 68 +++++++++++++++++++-----------------------
 4 files changed, 33 insertions(+), 40 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 054c5d9..19f3b04 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -7,6 +7,7 @@ WORKDIR /app/service
 
 COPY ./src ./src
 COPY cv_analysis ./cv_analysis
+COPY config.yaml ./config.yaml
 
 RUN python3 -m pip install --upgrade pip
 RUN python3 -m pip install -e .
diff --git a/config.yaml b/config.yaml
index fc6bb42..42bd2e7 100644
--- a/config.yaml
+++ b/config.yaml
@@ -23,5 +23,5 @@ deskew:
 test_dummy: test_dummy
 
 visual_logging:
-  level: $LOGGING_LEVEL_ROOT|DEBUG
+  level: $LOGGING_LEVEL_ROOT|INFO
   output_folder: /tmp/debug/
\ No newline at end of file
diff --git a/scripts/client_mock.py b/scripts/client_mock.py
index ffdd0ab..96ab9b4 100644
--- a/scripts/client_mock.py
+++ b/scripts/client_mock.py
@@ -49,7 +49,7 @@ def main(args):
         elif operation == "layout-parsing":
             response = requests.post("http://127.0.0.1:5000/layout", data=open(args.pdf_path, "rb"))
         else:
-            raise ValueError("{args.operation} is not a valid value.")
+            raise ValueError(f"{args.operation} is not a valid value.")
         response.raise_for_status()
         predictions = response.json()
 
diff --git a/src/run_service.py b/src/run_service.py
index 269c2f4..304e8d8 100644
--- a/src/run_service.py
+++ b/src/run_service.py
@@ -8,11 +8,10 @@ from prometheus_flask_exporter import PrometheusMetrics
 from waitress import serve
 
 from cv_analysis.utils import npconvert
-from cv_analysis.utils.preprocessing import preprocess_pdf_image  # TODO
-from cv_analysis.table_parsing import parse_table  # , detect_tables_in_pdf
-from cv_analysis.redaction_detection import find_redactions  # , detect_redactions_in_pdf
-from cv_analysis.layout_parsing import parse_layout  # , detect_layout_in_pdf #TODO
-from cv_analysis.figure_detection import detect_figures  # , detect_figures_in_pdf #TODO
+from cv_analysis.table_parsing import parse_table
+from cv_analysis.redaction_detection import find_redactions
+from cv_analysis.layout_parsing import parse_layout
+from cv_analysis.figure_detection import detect_figures
 from cv_analysis.utils.logging import logger
 from cv_analysis.utils.preprocessing import open_pdf
 from cv_analysis.config import CONFIG
@@ -44,7 +43,7 @@ def main():
     @metrics.summary("tables_request_time_seconds", "Time spent processing tables request")
     def get_tables():
         start_monitoring()
-        tables = annotate("tables")
+        tables = annotate(parse_table)
         # page_counter.inc(npages)
         return tables
 
@@ -52,7 +51,7 @@ def main():
     @metrics.summary("redactions_request_time_seconds", "Time spent processing redaction request")
     def get_redactions():
         start_monitoring()
-        redactions = annotate("redactions")
+        redactions = annotate(find_redactions)
         # page_counter.inc(npages)
         return redactions
 
@@ -60,7 +59,7 @@ def main():
     @metrics.summary("figures_request_time_seconds", "Time spent processing figures request")
     def get_figures():
         start_monitoring()
-        figures = annotate("figures")
+        figures = annotate(detect_figures)
         # page_counter.inc(npages)
         return figures
 
@@ -68,7 +67,7 @@ def main():
     @metrics.summary("layout_request_time_seconds", "Time spent processing layout request")
     def get_layout():
         start_monitoring()
-        layout = annotate("layout")
+        layout = annotate(parse_layout)
         # page_counter.inc(npages)
         return layout
 
@@ -77,7 +76,6 @@ def main():
         response = "OK"
         return jsonify(response)
 
-    # predictor = initialize_predictor()
     logger.info("<3 Annotator ready.")
 
     mode = CONFIG.webserver.mode
@@ -89,46 +87,40 @@ def main():
     tracemalloc.stop()
 
 
-def apply_annotation_function(annotation_function, page_list):
-    outdict = {}
-    for i, page in enumerate(page_list):
-        results = annotation_function(page)
-        if results:
-            outdict.update({i: results})
-    return outdict
 
-
-def make_annotations(pdf, task):
-    if task == "tables":
-        annotation = {"tables": apply_annotation_function(parse_table, pdf)}
-    elif task == "redactions":
-        annotation = {"redactions": apply_annotation_function(find_redactions, pdf)}
-    elif task == "figures":
-        annotation = {"figures": apply_annotation_function(detect_figures, pdf)}
-    elif task == "layout":
-        annotation = {"layout": apply_annotation_function(parse_layout, pdf)}
-    else:
-        raise ValueError(
-            f"'{task}' is not a valid operation keyword. Valid values include: \
-            \ntables\nredactions\nfigures\nlayout\n"
-        )
-
-    return json.dumps(annotation, default=npconvert)
+def make_annotations(pdf, annotation_function):
+    results = []
+    for i, page in enumerate(pdf):
+        boxes = annotation_function(page)
+        cells= []
+        if boxes:
+            cells = [{"x": x, "y": y, "width": w, "height": h} for x,y,w,h in boxes]
+        results.append({
+            "page": i, 
+            "pageWidth": page.shape[1],
+            "pageHeight": page.shape[0],
+            "cells": cells
+            })
+    logger.info(str(results))
+    logger.info(type(results))
+    output_dict = {"pages": results}
+    return jsonify(json.dumps(output_dict, default=npconvert))
 
 
 def get_size(data):
     return round(getsizeof(data) / 1000000, 2)
 
 
-def annotate(task):
+def annotate(annotation_function):
     def inner():
         data = request.data
         logger.info(f"Received data.")
         logger.info(f"Processing data.")
         pdf, angles = open_pdf(data)
-        # npages = len(pdf)
-        annotations = make_annotations(pdf, task)
-        return jsonify({"result": annotations, "deskew_angles": angles})
+        annotations = make_annotations(pdf, annotation_function)
+        #if CONFIG.deskew.function != "identity":
+        #    annotations.update({"deskew_angles": angles})
+        return annotations
 
     try:
         return inner()

From 4ac1cce0e89f8baa84fafca3cc8c959ba3b8591a Mon Sep 17 00:00:00 2001
From: Isaac Riley <Isaac.Riley@iqser.com>
Date: Tue, 26 Apr 2022 16:01:57 +0200
Subject: [PATCH 2/5] reformatting

---
 cv_analysis/figure_detection.py     |  1 -
 cv_analysis/layout_parsing.py       |  2 +-
 cv_analysis/redaction_detection.py  |  1 -
 cv_analysis/table_parsing.py        | 18 +++++++++---------
 cv_analysis/utils/visual_logging.py | 16 ++++++++--------
 scripts/annotate.py                 |  2 +-
 scripts/client_mock.py              |  2 +-
 src/run_service.py                  | 14 +++-----------
 8 files changed, 23 insertions(+), 33 deletions(-)

diff --git a/cv_analysis/figure_detection.py b/cv_analysis/figure_detection.py
index 4d50233..38f3f48 100644
--- a/cv_analysis/figure_detection.py
+++ b/cv_analysis/figure_detection.py
@@ -41,4 +41,3 @@ def detect_figures_in_pdf(pdf_path, page_index=1, show=False):
     vizlogger.debug(page, "figures03_final.png")
     if show:
         show_mpl(page)
-    
\ No newline at end of file
diff --git a/cv_analysis/layout_parsing.py b/cv_analysis/layout_parsing.py
index 2d6dcd3..290a3d0 100644
--- a/cv_analysis/layout_parsing.py
+++ b/cv_analysis/layout_parsing.py
@@ -86,7 +86,7 @@ def annotate_layout_in_pdf(pdf_path, page_index=1, show=False):
 
     if show:
         show_mpl(page)
-    
+
 
 """
 def find_layout_boxes(image: np.array):
diff --git a/cv_analysis/redaction_detection.py b/cv_analysis/redaction_detection.py
index f4fe7ca..e81ef53 100644
--- a/cv_analysis/redaction_detection.py
+++ b/cv_analysis/redaction_detection.py
@@ -51,4 +51,3 @@ def annotate_redactions_in_pdf(pdf_path, page_index=1, show=False):
 
     if show:
         show_mpl(page)
-    
\ No newline at end of file
diff --git a/cv_analysis/table_parsing.py b/cv_analysis/table_parsing.py
index a6a6afd..11e64d2 100644
--- a/cv_analysis/table_parsing.py
+++ b/cv_analysis/table_parsing.py
@@ -26,12 +26,12 @@ def add_external_contours(image, img):
 
 def apply_motion_blur(image: np.array, angle, size=80):
     """Solidifies and slightly extends detected lines.
-    
+
     Args:
         image (np.array): page image as array
         angle: direction in which to apply blur, 0 or 90
         size (int): kernel size; 80 found empirically to work well
-        
+
     Returns:
         np.array
 
@@ -50,8 +50,8 @@ def apply_motion_blur(image: np.array, angle, size=80):
 
 def isolate_vertical_and_horizontal_components(img_bin, bounding_rects):
     """Identifies and reinforces horizontal and vertical lines in a binary image.
-    
-    Args: 
+
+    Args:
         img_bin (np.array): array corresponding to single binarized page image
         bounding_rects (list): list of layout boxes of the form (x, y, w, h), potentially containing tables
 
@@ -140,11 +140,11 @@ def preprocess(image: np.array):
 
 
 def parse_table(image: np.array, show=False):
-    """Runs the full table parsing process. 
-    
+    """Runs the full table parsing process.
+
     Args:
         image (np.array): single PDF page, opened as PIL.Image object and converted to a numpy array
-        
+
     Returns:
         list: list of rectangles corresponding to table cells
     """
@@ -154,10 +154,10 @@ def parse_table(image: np.array, show=False):
         return area > 2000 and w > 35 and h > 25
 
     image = preprocess(image)
-    
+
     table_layout_boxes = find_table_layout_boxes(image)
     image = isolate_vertical_and_horizontal_components(image, table_layout_boxes)
-    
+
     _, _, stats, _ = cv2.connectedComponentsWithStats(~image, connectivity=8, ltype=cv2.CV_32S)
 
     stats = np.vstack(list(filter(is_large_enough, stats)))
diff --git a/cv_analysis/utils/visual_logging.py b/cv_analysis/utils/visual_logging.py
index 19bbded..6afbd57 100644
--- a/cv_analysis/utils/visual_logging.py
+++ b/cv_analysis/utils/visual_logging.py
@@ -2,21 +2,21 @@ import os
 from cv_analysis.config import CONFIG
 from cv_analysis.utils.display import save_mpl
 
-LEVEL = CONFIG.visual_logging.level
-OUTPUT_FOLDER = CONFIG.visual_logging.output_folder
-
 
 class VisualLogger:
-    def __init__(self):
-        self.level_is_debug = LEVEL == "DEBUG"
-        self.output_folder = OUTPUT_FOLDER
+    def __init__(self, level, output_folder):
+        self.level = level
+        self.output_folder = output_folder
         if not os.path.exists(self.output_folder):
             os.mkdir(self.output_folder)
 
     def debug(self, img, name):
-        if self.level_is_debug:
+        if self.level_is_debug():
             output_path = os.path.join(self.output_folder, name)
             save_mpl(img, output_path)
+    
+    def level_is_debug(self):
+        return self.level == "DEBUG"
 
 
-vizlogger = VisualLogger()
+vizlogger = VisualLogger(CONFIG.visual_logging.level, CONFIG.visual_logging.output_folder)
diff --git a/scripts/annotate.py b/scripts/annotate.py
index a5d8e20..c92ecf1 100644
--- a/scripts/annotate.py
+++ b/scripts/annotate.py
@@ -20,7 +20,7 @@ def parse_args():
 
 if __name__ == "__main__":
     args = parse_args()
-    #print(args.show)
+    # print(args.show)
     if args.type == "table":
         annotate_tables_in_pdf(args.pdf_path, page_index=args.page_index, show=args.show)
     elif args.type == "redaction":
diff --git a/scripts/client_mock.py b/scripts/client_mock.py
index 96ab9b4..a64fe95 100644
--- a/scripts/client_mock.py
+++ b/scripts/client_mock.py
@@ -34,7 +34,7 @@ def parse_args():
 
 
 def main(args):
-    
+
     operations = args.operations.split(",")
     for operation in operations:
         print("****************************")
diff --git a/src/run_service.py b/src/run_service.py
index 304e8d8..876b96e 100644
--- a/src/run_service.py
+++ b/src/run_service.py
@@ -87,20 +87,12 @@ def main():
     tracemalloc.stop()
 
 
-
 def make_annotations(pdf, annotation_function):
     results = []
     for i, page in enumerate(pdf):
         boxes = annotation_function(page)
-        cells= []
-        if boxes:
-            cells = [{"x": x, "y": y, "width": w, "height": h} for x,y,w,h in boxes]
-        results.append({
-            "page": i, 
-            "pageWidth": page.shape[1],
-            "pageHeight": page.shape[0],
-            "cells": cells
-            })
+        cells = [{"x": x, "y": y, "width": w, "height": h} for x, y, w, h in boxes]
+        results.append({"page": i, "pageWidth": page.shape[1], "pageHeight": page.shape[0], "cells": cells})
     logger.info(str(results))
     logger.info(type(results))
     output_dict = {"pages": results}
@@ -118,7 +110,7 @@ def annotate(annotation_function):
         logger.info(f"Processing data.")
         pdf, angles = open_pdf(data)
         annotations = make_annotations(pdf, annotation_function)
-        #if CONFIG.deskew.function != "identity":
+        # if CONFIG.deskew.function != "identity":
         #    annotations.update({"deskew_angles": angles})
         return annotations
 

From 41e5f55ea700faca12631c1398875db0459073bb Mon Sep 17 00:00:00 2001
From: Isaac Riley <Isaac.Riley@iqser.com>
Date: Wed, 27 Apr 2022 09:18:57 +0200
Subject: [PATCH 3/5] got changes to table parsing from other branch

---
 cv_analysis/table_parsing.py | 59 +++++++++++++++++++++++-------------
 1 file changed, 38 insertions(+), 21 deletions(-)

diff --git a/cv_analysis/table_parsing.py b/cv_analysis/table_parsing.py
index 11e64d2..0a6ceed 100644
--- a/cv_analysis/table_parsing.py
+++ b/cv_analysis/table_parsing.py
@@ -11,12 +11,15 @@ from cv_analysis.utils.display import show_mpl
 from cv_analysis.utils.draw import draw_rectangles
 from cv_analysis.utils.post_processing import xywh_to_vecs, xywh_to_vec_rect, adjacent1d
 from cv_analysis.utils.deskew import deskew_histbased
+from cv_analysis.utils.filters import is_large_enough
 from cv_analysis.utils.visual_logging import vizlogger
 from cv_analysis.layout_parsing import parse_layout
 
 
-def add_external_contours(image, img):
-    contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
+def add_external_contours(image, contour_source_image):
+    contours, _ = cv2.findContours(contour_source_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
+    contours = filter(partial(is_large_enough, min_area=5000), contours)
+
     for cnt in contours:
         x, y, w, h = cv2.boundingRect(cnt)
         cv2.rectangle(image, (x, y), (x + w, y + h), 255, 1)
@@ -24,6 +27,16 @@ def add_external_contours(image, img):
     return image
 
 
+def extend_lines():
+    #TODO
+    pass
+
+
+def make_table_block_mask():
+    #TODO
+    pass
+
+
 def apply_motion_blur(image: np.array, angle, size=80):
     """Solidifies and slightly extends detected lines.
 
@@ -48,7 +61,7 @@ def apply_motion_blur(image: np.array, angle, size=80):
     return blurred
 
 
-def isolate_vertical_and_horizontal_components(img_bin, bounding_rects):
+def isolate_vertical_and_horizontal_components(img_bin):
     """Identifies and reinforces horizontal and vertical lines in a binary image.
 
     Args:
@@ -65,19 +78,20 @@ def isolate_vertical_and_horizontal_components(img_bin, bounding_rects):
     img_bin_h = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernel_h)
     vizlogger.debug(img_bin_h, "tables01_isolate01_img_bin_h.png")
     img_bin_v = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernel_v)
-    vizlogger.debug(img_bin_v, "tables02_isolate02_img_bin_v.png")
-
+    img_lines_raw = img_bin_v | img_bin_h
+    vizlogger.debug(img_lines_raw, "tables02_isolate02_img_bin_v.png")
+    
     kernel_h = np.ones((1, 30), np.uint8)
     kernel_v = np.ones((30, 1), np.uint8)
     img_bin_h = cv2.dilate(img_bin_h, kernel_h, iterations=2)
     vizlogger.debug(img_bin_h, "tables03_isolate03_dilate_h.png")
     img_bin_v = cv2.dilate(img_bin_v, kernel_v, iterations=2)
-    vizlogger.debug(img_bin_v, "tables04_isolate04_dilate_v.png")
+    vizlogger.debug(img_bin_v | img_bin_h, "tables04_isolate04_dilate_v.png")
 
     img_bin_h = apply_motion_blur(img_bin_h, 0)
     vizlogger.debug(img_bin_h, "tables09_isolate05_blur_h.png")
     img_bin_v = apply_motion_blur(img_bin_v, 90)
-    vizlogger.debug(img_bin_v, "tables10_isolate06_blur_v.png")
+    vizlogger.debug(img_bin_v | img_bin_h, "tables10_isolate06_blur_v.png")
 
     img_bin_final = img_bin_h | img_bin_v
     vizlogger.debug(img_bin_final, "tables11_isolate07_final.png")
@@ -86,20 +100,14 @@ def isolate_vertical_and_horizontal_components(img_bin, bounding_rects):
     vizlogger.debug(img_bin_final, "tables10_isolate12_threshold.png")
     img_bin_final = cv2.dilate(img_bin_final, np.ones((1, 1), np.uint8), iterations=1)
     vizlogger.debug(img_bin_final, "tables11_isolate13_dilate.png")
-
-    img_bin_final = disconnect_non_existing_cells(img_bin_final, bounding_rects)
-    vizlogger.debug(img_bin_final, "tables12_isolate14_disconnect.png")
+    
+    # add contours before lines are extended by blurring
+    img_bin_final = add_external_contours(img_bin_final, img_lines_raw)
+    vizlogger.debug(img_bin_final, "tables11_isolate14_contours_added.png")
 
     return img_bin_final
 
 
-def disconnect_non_existing_cells(img_bin, bounding_rects):
-    for rect in bounding_rects:
-        x, y, w, h = rect
-        img_bin = cv2.rectangle(img_bin, (x, y), (x + w, y + h), (0, 0, 0), 5)
-    return img_bin
-
-
 def has_table_shape(rects):
     assert isinstance(rects, list)
 
@@ -156,7 +164,10 @@ def parse_table(image: np.array, show=False):
     image = preprocess(image)
 
     table_layout_boxes = find_table_layout_boxes(image)
-    image = isolate_vertical_and_horizontal_components(image, table_layout_boxes)
+
+    image = isolate_vertical_and_horizontal_components(image)
+    #image = add_external_contours(image, image)
+    #vizlogger.debug(image, "external_contours_added.png")
 
     _, _, stats, _ = cv2.connectedComponentsWithStats(~image, connectivity=8, ltype=cv2.CV_32S)
 
@@ -177,7 +188,13 @@ def annotate_tables_in_pdf(pdf_path, page_index=0, deskew=False, show=False):
 
     stats = parse_table(page)
     page = draw_rectangles(page, stats, annotate=True)
-
-    if show:
-        show_mpl(page)
     vizlogger.debug(page, "tables15_final_output.png")
+
+
+def tables_in_image(cropped_image):
+    table_rects = parse_table(cropped_image)
+
+    if len(table_rects) > 0:
+        return True, table_rects
+    else:
+        return False, None

From 81fe5139c2d823e344535293a16dd45f26d314ba Mon Sep 17 00:00:00 2001
From: Isaac Riley <Isaac.Riley@iqser.com>
Date: Wed, 27 Apr 2022 10:52:35 +0200
Subject: [PATCH 4/5] fixed tests, passed (still need to extend tests)

---
 cv_analysis/table_parsing.py          |   2 +-
 cv_analysis/test/test_data/table.json | 460 ++++----------------------
 src/run_service.py                    |   7 -
 3 files changed, 59 insertions(+), 410 deletions(-)

diff --git a/cv_analysis/table_parsing.py b/cv_analysis/table_parsing.py
index 0a6ceed..852df2b 100644
--- a/cv_analysis/table_parsing.py
+++ b/cv_analysis/table_parsing.py
@@ -174,7 +174,7 @@ def parse_table(image: np.array, show=False):
     stats = np.vstack(list(filter(is_large_enough, stats)))
     rects = stats[:, :-1][2:]
 
-    return list(rects)
+    return list(map(list, rects))
 
 
 def annotate_tables_in_pdf(pdf_path, page_index=0, deskew=False, show=False):
diff --git a/cv_analysis/test/test_data/table.json b/cv_analysis/test/test_data/table.json
index 009b24e..5e78d0e 100644
--- a/cv_analysis/test/test_data/table.json
+++ b/cv_analysis/test/test_data/table.json
@@ -1,406 +1,62 @@
 {
     "0": [
-      [
-        211,
-        415,
-        367,
-        29
-      ],
-      [
-        581,
-        415,
-        417,
-        29
-      ],
-      [
-        1001,
-        415,
-        406,
-        29
-      ],
-      [
-        211,
-        447,
-        367,
-        47
-      ],
-      [
-        581,
-        447,
-        417,
-        47
-      ],
-      [
-        1001,
-        447,
-        406,
-        47
-      ],
-      [
-        211,
-        497,
-        367,
-        47
-      ],
-      [
-        580,
-        497,
-        418,
-        47
-      ],
-      [
-        1001,
-        497,
-        406,
-        47
-      ],
-      [
-        211,
-        547,
-        367,
-        47
-      ],
-      [
-        580,
-        547,
-        418,
-        47
-      ],
-      [
-        1001,
-        547,
-        406,
-        47
-      ],
-      [
-        211,
-        597,
-        367,
-        47
-      ],
-      [
-        581,
-        597,
-        417,
-        47
-      ],
-      [
-        1001,
-        597,
-        406,
-        48
-      ],
-      [
-        212,
-        647,
-        366,
-        48
-      ],
-      [
-        581,
-        647,
-        417,
-        48
-      ],
-      [
-        1001,
-        647,
-        406,
-        48
-      ],
-      [
-        581,
-        697,
-        417,
-        47
-      ],
-      [
-        1001,
-        697,
-        407,
-        48
-      ],
-      [
-        212,
-        698,
-        366,
-        47
-      ],
-      [
-        211,
-        747,
-        367,
-        48
-      ],
-      [
-        581,
-        747,
-        417,
-        48
-      ],
-      [
-        1001,
-        748,
-        407,
-        47
-      ],
-      [
-        211,
-        798,
-        367,
-        47
-      ],
-      [
-        581,
-        798,
-        417,
-        47
-      ],
-      [
-        1001,
-        798,
-        407,
-        47
-      ],
-      [
-        212,
-        848,
-        366,
-        47
-      ],
-      [
-        581,
-        848,
-        417,
-        47
-      ],
-      [
-        1001,
-        848,
-        407,
-        48
-      ],
-      [
-        212,
-        898,
-        366,
-        48
-      ],
-      [
-        581,
-        898,
-        417,
-        48
-      ],
-      [
-        1001,
-        898,
-        407,
-        48
-      ],
-      [
-        212,
-        949,
-        366,
-        33
-      ],
-      [
-        581,
-        949,
-        827,
-        33
-      ],
-      [
-        462,
-        1163,
-        368,
-        29
-      ],
-      [
-        833,
-        1163,
-        404,
-        29
-      ],
-      [
-        462,
-        1195,
-        368,
-        48
-      ],
-      [
-        833,
-        1195,
-        404,
-        48
-      ],
-      [
-        462,
-        1245,
-        368,
-        48
-      ],
-      [
-        833,
-        1245,
-        404,
-        47
-      ],
-      [
-        462,
-        1296,
-        368,
-        47
-      ],
-      [
-        833,
-        1296,
-        404,
-        47
-      ],
-      [
-        462,
-        1346,
-        368,
-        47
-      ],
-      [
-        833,
-        1346,
-        404,
-        47
-      ],
-      [
-        462,
-        1396,
-        368,
-        47
-      ],
-      [
-        834,
-        1396,
-        403,
-        47
-      ],
-      [
-        462,
-        1446,
-        368,
-        48
-      ],
-      [
-        833,
-        1446,
-        404,
-        48
-      ],
-      [
-        462,
-        1496,
-        368,
-        48
-      ],
-      [
-        833,
-        1496,
-        404,
-        48
-      ],
-      [
-        462,
-        1547,
-        368,
-        47
-      ],
-      [
-        834,
-        1547,
-        403,
-        47
-      ],
-      [
-        462,
-        1597,
-        368,
-        48
-      ],
-      [
-        834,
-        1597,
-        403,
-        47
-      ],
-      [
-        462,
-        1647,
-        368,
-        48
-      ],
-      [
-        833,
-        1647,
-        404,
-        48
-      ],
-      [
-        462,
-        1698,
-        368,
-        47
-      ],
-      [
-        833,
-        1698,
-        404,
-        47
-      ],
-      [
-        462,
-        1748,
-        368,
-        47
-      ],
-      [
-        834,
-        1748,
-        403,
-        47
-      ],
-      [
-        462,
-        1798,
-        368,
-        47
-      ],
-      [
-        834,
-        1798,
-        403,
-        47
-      ],
-      [
-        462,
-        1848,
-        368,
-        48
-      ],
-      [
-        834,
-        1848,
-        403,
-        48
-      ],
-      [
-        462,
-        1899,
-        369,
-        33
-      ],
-      [
-        832,
-        1899,
-        405,
-        33
-      ]
+      [211, 447, 367, 47], 
+      [581, 447, 417, 47], 
+      [1001, 447, 406, 47], 
+      [211, 497, 367, 47], 
+      [580, 497, 418, 47], 
+      [1001, 497, 406, 47], 
+      [211, 547, 367, 47], 
+      [580, 547, 418, 47], 
+      [1001, 547, 406, 47], 
+      [211, 597, 367, 47], 
+      [581, 597, 417, 47], 
+      [1001, 597, 406, 48], 
+      [212, 647, 366, 48], 
+      [581, 647, 417, 48], 
+      [1001, 647, 406, 48], 
+      [581, 697, 417, 47], 
+      [1001, 697, 407, 48], 
+      [212, 698, 366, 47], 
+      [211, 747, 367, 48], 
+      [581, 747, 417, 48], 
+      [1001, 748, 407, 47], 
+      [211, 798, 367, 47], 
+      [581, 798, 417, 47], 
+      [1001, 798, 407, 47], 
+      [212, 848, 366, 47], 
+      [581, 848, 417, 47], 
+      [1001, 848, 407, 48], 
+      [212, 898, 366, 48], 
+      [581, 898, 417, 48], 
+      [1001, 898, 407, 48], 
+      [462, 1195, 368, 48], 
+      [833, 1195, 404, 48], 
+      [462, 1245, 368, 48], 
+      [833, 1245, 404, 47], 
+      [462, 1296, 368, 47], 
+      [833, 1296, 404, 47], 
+      [462, 1346, 368, 47], 
+      [833, 1346, 404, 47], 
+      [462, 1396, 368, 47], 
+      [834, 1396, 403, 47], 
+      [462, 1446, 368, 48], 
+      [833, 1446, 404, 48], 
+      [462, 1496, 368, 48], 
+      [833, 1496, 404, 48], 
+      [462, 1547, 368, 47], 
+      [834, 1547, 403, 47], 
+      [462, 1597, 368, 48], 
+      [834, 1597, 403, 47], 
+      [462, 1647, 368, 48], 
+      [833, 1647, 404, 48], 
+      [462, 1698, 368, 47], 
+      [833, 1698, 404, 47], 
+      [462, 1748, 368, 47], 
+      [834, 1748, 403, 47], 
+      [462, 1798, 368, 47], 
+      [834, 1798, 403, 47], 
+      [462, 1848, 368, 48], 
+      [834, 1848, 403, 48]
     ]
   }
\ No newline at end of file
diff --git a/src/run_service.py b/src/run_service.py
index 876b96e..86454eb 100644
--- a/src/run_service.py
+++ b/src/run_service.py
@@ -25,7 +25,6 @@ def suppress_user_warnings():
 
 def main():
     file_counter = Counter("cv_analysis_file_counter", "count processed files")
-    # page_counter = Counter("cv_analysis_page_counter", "count pages from processed files")
     ram_metric = Gauge("cv_analysis_memory_usage", "Memory usage in Mb")
 
     def start_monitoring():
@@ -44,7 +43,6 @@ def main():
     def get_tables():
         start_monitoring()
         tables = annotate(parse_table)
-        # page_counter.inc(npages)
         return tables
 
     @app.route("/redactions", methods=["POST"])
@@ -52,7 +50,6 @@ def main():
     def get_redactions():
         start_monitoring()
         redactions = annotate(find_redactions)
-        # page_counter.inc(npages)
         return redactions
 
     @app.route("/figures", methods=["POST"])
@@ -60,7 +57,6 @@ def main():
     def get_figures():
         start_monitoring()
         figures = annotate(detect_figures)
-        # page_counter.inc(npages)
         return figures
 
     @app.route("/layout", methods=["POST"])
@@ -68,7 +64,6 @@ def main():
     def get_layout():
         start_monitoring()
         layout = annotate(parse_layout)
-        # page_counter.inc(npages)
         return layout
 
     @app.route("/status", methods=["GET"])
@@ -93,8 +88,6 @@ def make_annotations(pdf, annotation_function):
         boxes = annotation_function(page)
         cells = [{"x": x, "y": y, "width": w, "height": h} for x, y, w, h in boxes]
         results.append({"page": i, "pageWidth": page.shape[1], "pageHeight": page.shape[0], "cells": cells})
-    logger.info(str(results))
-    logger.info(type(results))
     output_dict = {"pages": results}
     return jsonify(json.dumps(output_dict, default=npconvert))
 

From 21d1f087c84d4eb61e8ac77a691cdc19fd67617c Mon Sep 17 00:00:00 2001
From: Isaac Riley <Isaac.Riley@iqser.com>
Date: Wed, 27 Apr 2022 11:27:38 +0200
Subject: [PATCH 5/5] fixed show parameter, for development only

---
 cv_analysis/table_parsing.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cv_analysis/table_parsing.py b/cv_analysis/table_parsing.py
index 852df2b..5d4b522 100644
--- a/cv_analysis/table_parsing.py
+++ b/cv_analysis/table_parsing.py
@@ -189,6 +189,8 @@ def annotate_tables_in_pdf(pdf_path, page_index=0, deskew=False, show=False):
     stats = parse_table(page)
     page = draw_rectangles(page, stats, annotate=True)
     vizlogger.debug(page, "tables15_final_output.png")
+    if show:
+        show_mpl(page)
 
 
 def tables_in_image(cropped_image):