reformatting
This commit is contained in:
parent
9327fb7231
commit
4ac1cce0e8
@ -41,4 +41,3 @@ def detect_figures_in_pdf(pdf_path, page_index=1, show=False):
|
||||
vizlogger.debug(page, "figures03_final.png")
|
||||
if show:
|
||||
show_mpl(page)
|
||||
|
||||
@ -86,7 +86,7 @@ def annotate_layout_in_pdf(pdf_path, page_index=1, show=False):
|
||||
|
||||
if show:
|
||||
show_mpl(page)
|
||||
|
||||
|
||||
|
||||
"""
|
||||
def find_layout_boxes(image: np.array):
|
||||
|
||||
@ -51,4 +51,3 @@ def annotate_redactions_in_pdf(pdf_path, page_index=1, show=False):
|
||||
|
||||
if show:
|
||||
show_mpl(page)
|
||||
|
||||
@ -26,12 +26,12 @@ def add_external_contours(image, img):
|
||||
|
||||
def apply_motion_blur(image: np.array, angle, size=80):
|
||||
"""Solidifies and slightly extends detected lines.
|
||||
|
||||
|
||||
Args:
|
||||
image (np.array): page image as array
|
||||
angle: direction in which to apply blur, 0 or 90
|
||||
size (int): kernel size; 80 found empirically to work well
|
||||
|
||||
|
||||
Returns:
|
||||
np.array
|
||||
|
||||
@ -50,8 +50,8 @@ def apply_motion_blur(image: np.array, angle, size=80):
|
||||
|
||||
def isolate_vertical_and_horizontal_components(img_bin, bounding_rects):
|
||||
"""Identifies and reinforces horizontal and vertical lines in a binary image.
|
||||
|
||||
Args:
|
||||
|
||||
Args:
|
||||
img_bin (np.array): array corresponding to single binarized page image
|
||||
bounding_rects (list): list of layout boxes of the form (x, y, w, h), potentially containing tables
|
||||
|
||||
@ -140,11 +140,11 @@ def preprocess(image: np.array):
|
||||
|
||||
|
||||
def parse_table(image: np.array, show=False):
|
||||
"""Runs the full table parsing process.
|
||||
|
||||
"""Runs the full table parsing process.
|
||||
|
||||
Args:
|
||||
image (np.array): single PDF page, opened as PIL.Image object and converted to a numpy array
|
||||
|
||||
|
||||
Returns:
|
||||
list: list of rectangles corresponding to table cells
|
||||
"""
|
||||
@ -154,10 +154,10 @@ def parse_table(image: np.array, show=False):
|
||||
return area > 2000 and w > 35 and h > 25
|
||||
|
||||
image = preprocess(image)
|
||||
|
||||
|
||||
table_layout_boxes = find_table_layout_boxes(image)
|
||||
image = isolate_vertical_and_horizontal_components(image, table_layout_boxes)
|
||||
|
||||
|
||||
_, _, stats, _ = cv2.connectedComponentsWithStats(~image, connectivity=8, ltype=cv2.CV_32S)
|
||||
|
||||
stats = np.vstack(list(filter(is_large_enough, stats)))
|
||||
|
||||
@ -2,21 +2,21 @@ import os
|
||||
from cv_analysis.config import CONFIG
|
||||
from cv_analysis.utils.display import save_mpl
|
||||
|
||||
LEVEL = CONFIG.visual_logging.level
|
||||
OUTPUT_FOLDER = CONFIG.visual_logging.output_folder
|
||||
|
||||
|
||||
class VisualLogger:
|
||||
def __init__(self):
|
||||
self.level_is_debug = LEVEL == "DEBUG"
|
||||
self.output_folder = OUTPUT_FOLDER
|
||||
def __init__(self, level, output_folder):
|
||||
self.level = level
|
||||
self.output_folder = output_folder
|
||||
if not os.path.exists(self.output_folder):
|
||||
os.mkdir(self.output_folder)
|
||||
|
||||
def debug(self, img, name):
|
||||
if self.level_is_debug:
|
||||
if self.level_is_debug():
|
||||
output_path = os.path.join(self.output_folder, name)
|
||||
save_mpl(img, output_path)
|
||||
|
||||
def level_is_debug(self):
|
||||
return self.level == "DEBUG"
|
||||
|
||||
|
||||
vizlogger = VisualLogger()
|
||||
vizlogger = VisualLogger(CONFIG.visual_logging.level, CONFIG.visual_logging.output_folder)
|
||||
|
||||
@ -20,7 +20,7 @@ def parse_args():
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
#print(args.show)
|
||||
# print(args.show)
|
||||
if args.type == "table":
|
||||
annotate_tables_in_pdf(args.pdf_path, page_index=args.page_index, show=args.show)
|
||||
elif args.type == "redaction":
|
||||
|
||||
@ -34,7 +34,7 @@ def parse_args():
|
||||
|
||||
|
||||
def main(args):
|
||||
|
||||
|
||||
operations = args.operations.split(",")
|
||||
for operation in operations:
|
||||
print("****************************")
|
||||
|
||||
@ -87,20 +87,12 @@ def main():
|
||||
tracemalloc.stop()
|
||||
|
||||
|
||||
|
||||
def make_annotations(pdf, annotation_function):
|
||||
results = []
|
||||
for i, page in enumerate(pdf):
|
||||
boxes = annotation_function(page)
|
||||
cells= []
|
||||
if boxes:
|
||||
cells = [{"x": x, "y": y, "width": w, "height": h} for x,y,w,h in boxes]
|
||||
results.append({
|
||||
"page": i,
|
||||
"pageWidth": page.shape[1],
|
||||
"pageHeight": page.shape[0],
|
||||
"cells": cells
|
||||
})
|
||||
cells = [{"x": x, "y": y, "width": w, "height": h} for x, y, w, h in boxes]
|
||||
results.append({"page": i, "pageWidth": page.shape[1], "pageHeight": page.shape[0], "cells": cells})
|
||||
logger.info(str(results))
|
||||
logger.info(type(results))
|
||||
output_dict = {"pages": results}
|
||||
@ -118,7 +110,7 @@ def annotate(annotation_function):
|
||||
logger.info(f"Processing data.")
|
||||
pdf, angles = open_pdf(data)
|
||||
annotations = make_annotations(pdf, annotation_function)
|
||||
#if CONFIG.deskew.function != "identity":
|
||||
# if CONFIG.deskew.function != "identity":
|
||||
# annotations.update({"deskew_angles": angles})
|
||||
return annotations
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user