reformatting

This commit is contained in:
Isaac Riley 2022-04-26 16:01:57 +02:00
parent 9327fb7231
commit 4ac1cce0e8
8 changed files with 23 additions and 33 deletions

View File

@ -41,4 +41,3 @@ def detect_figures_in_pdf(pdf_path, page_index=1, show=False):
vizlogger.debug(page, "figures03_final.png")
if show:
show_mpl(page)

View File

@ -86,7 +86,7 @@ def annotate_layout_in_pdf(pdf_path, page_index=1, show=False):
if show:
show_mpl(page)
"""
def find_layout_boxes(image: np.array):

View File

@ -51,4 +51,3 @@ def annotate_redactions_in_pdf(pdf_path, page_index=1, show=False):
if show:
show_mpl(page)

View File

@ -26,12 +26,12 @@ def add_external_contours(image, img):
def apply_motion_blur(image: np.array, angle, size=80):
"""Solidifies and slightly extends detected lines.
Args:
image (np.array): page image as array
angle: direction in which to apply blur, 0 or 90
size (int): kernel size; 80 found empirically to work well
Returns:
np.array
@ -50,8 +50,8 @@ def apply_motion_blur(image: np.array, angle, size=80):
def isolate_vertical_and_horizontal_components(img_bin, bounding_rects):
"""Identifies and reinforces horizontal and vertical lines in a binary image.
Args:
Args:
img_bin (np.array): array corresponding to single binarized page image
bounding_rects (list): list of layout boxes of the form (x, y, w, h), potentially containing tables
@ -140,11 +140,11 @@ def preprocess(image: np.array):
def parse_table(image: np.array, show=False):
"""Runs the full table parsing process.
"""Runs the full table parsing process.
Args:
image (np.array): single PDF page, opened as PIL.Image object and converted to a numpy array
Returns:
list: list of rectangles corresponding to table cells
"""
@ -154,10 +154,10 @@ def parse_table(image: np.array, show=False):
return area > 2000 and w > 35 and h > 25
image = preprocess(image)
table_layout_boxes = find_table_layout_boxes(image)
image = isolate_vertical_and_horizontal_components(image, table_layout_boxes)
_, _, stats, _ = cv2.connectedComponentsWithStats(~image, connectivity=8, ltype=cv2.CV_32S)
stats = np.vstack(list(filter(is_large_enough, stats)))

View File

@ -2,21 +2,21 @@ import os
from cv_analysis.config import CONFIG
from cv_analysis.utils.display import save_mpl
LEVEL = CONFIG.visual_logging.level
OUTPUT_FOLDER = CONFIG.visual_logging.output_folder
class VisualLogger:
def __init__(self):
self.level_is_debug = LEVEL == "DEBUG"
self.output_folder = OUTPUT_FOLDER
def __init__(self, level, output_folder):
self.level = level
self.output_folder = output_folder
if not os.path.exists(self.output_folder):
os.mkdir(self.output_folder)
def debug(self, img, name):
if self.level_is_debug:
if self.level_is_debug():
output_path = os.path.join(self.output_folder, name)
save_mpl(img, output_path)
def level_is_debug(self):
return self.level == "DEBUG"
vizlogger = VisualLogger()
vizlogger = VisualLogger(CONFIG.visual_logging.level, CONFIG.visual_logging.output_folder)

View File

@ -20,7 +20,7 @@ def parse_args():
if __name__ == "__main__":
args = parse_args()
#print(args.show)
# print(args.show)
if args.type == "table":
annotate_tables_in_pdf(args.pdf_path, page_index=args.page_index, show=args.show)
elif args.type == "redaction":

View File

@ -34,7 +34,7 @@ def parse_args():
def main(args):
operations = args.operations.split(",")
for operation in operations:
print("****************************")

View File

@ -87,20 +87,12 @@ def main():
tracemalloc.stop()
def make_annotations(pdf, annotation_function):
results = []
for i, page in enumerate(pdf):
boxes = annotation_function(page)
cells= []
if boxes:
cells = [{"x": x, "y": y, "width": w, "height": h} for x,y,w,h in boxes]
results.append({
"page": i,
"pageWidth": page.shape[1],
"pageHeight": page.shape[0],
"cells": cells
})
cells = [{"x": x, "y": y, "width": w, "height": h} for x, y, w, h in boxes]
results.append({"page": i, "pageWidth": page.shape[1], "pageHeight": page.shape[0], "cells": cells})
logger.info(str(results))
logger.info(type(results))
output_dict = {"pages": results}
@ -118,7 +110,7 @@ def annotate(annotation_function):
logger.info(f"Processing data.")
pdf, angles = open_pdf(data)
annotations = make_annotations(pdf, annotation_function)
#if CONFIG.deskew.function != "identity":
# if CONFIG.deskew.function != "identity":
# annotations.update({"deskew_angles": angles})
return annotations