diff --git a/config.yaml b/config.yaml index fc6bb42..c739249 100644 --- a/config.yaml +++ b/config.yaml @@ -23,5 +23,5 @@ deskew: test_dummy: test_dummy visual_logging: - level: $LOGGING_LEVEL_ROOT|DEBUG + level: $LOGGING_LEVEL_ROOT|INFO # NOTHNG > INFO > DEBUG > ALL output_folder: /tmp/debug/ \ No newline at end of file diff --git a/cv_analysis/fig_detection_with_layout.py b/cv_analysis/fig_detection_with_layout.py deleted file mode 100644 index ce1d71b..0000000 --- a/cv_analysis/fig_detection_with_layout.py +++ /dev/null @@ -1,58 +0,0 @@ -from cv_analysis.layout_parsing import annotate_layout_in_pdf -from cv_analysis.figure_detection import detect_figures -from cv_analysis.table_parsing import tables_in_image, parse_table -from cv_analysis.utils.draw import draw_rectangles -from cv_analysis.utils.display import show_mpl -from cv_analysis.utils.visual_logging import vizlogger -#from PIL import Image - - - -def cut_out_content_structures(layout_rects, page): - large_rects = [] - small_rects = [] - for x, y, w, h in layout_rects: - rect = (x, y, w, h) - if w * h >= 75000: - cropped_page = page[y:(y + h), x:(x + w)] - large_rects.append([rect, cropped_page]) - else: - cropped_page = page[y:(y + h), x:(x + w)] - small_rects.append([rect, cropped_page]) - return large_rects, small_rects - - -def parse_content_structures(page, large_rects, small_rects): - for coordinates, cropped_image in large_rects: - figure_rects = detect_figures(cropped_image) - if len(figure_rects) == 0: # text - page = draw_rectangles(page, [coordinates], color=(0, 255, 0), annotate=True) - elif len(parse_table(cropped_image)) > 0: - #elif tables_in_image(cropped_image)[0]: # table - stats = parse_table(cropped_image) - cropped_image = draw_rectangles(cropped_image, stats, color=(255, 0, 0), annotate=True) - x,y,w,h = coordinates - page[y:y+h, x:x+w] = cropped_image - else: # figure - page = draw_rectangles(page, [coordinates], color=(0, 0, 255), annotate=True) - - # for coordinates, cropped_image in small_rects: - # figure_rects = detect_figures(cropped_image) - # if len(figure_rects) == 0 and len(list(find_primary_text_regions(cropped_image))) > 0: - # page = draw_rectangles(page, [coordinates], color=(0, 255, 0), annotate=True) - # else: - # page = draw_rectangles(page, [coordinates], color=(0, 255, 255), annotate=True) - return page - - -def detect_figures_with_layout_parsing(pdf_path, page_index=1, show=False): - layout_rects, page = annotate_layout_in_pdf(pdf_path, page_index, return_rects=True) - big_structures, small_structures = cut_out_content_structures(layout_rects, page) - page = parse_content_structures(page, big_structures, small_structures) - vizlogger.debug(page, "figures03_final.png") - if show: - show_mpl(page) - else: - return page - - diff --git a/cv_analysis/figure_detection.py b/cv_analysis/figure_detection.py index 32db8d5..8f5dcde 100644 --- a/cv_analysis/figure_detection.py +++ b/cv_analysis/figure_detection.py @@ -1,7 +1,7 @@ import cv2 import numpy as np from pdf2image import pdf2image -import pandas as pd +#import pandas as pd from PIL import Image import timeit from os import path @@ -11,6 +11,7 @@ from cv_analysis.utils.display import show_mpl from cv_analysis.utils.draw import draw_rectangles from cv_analysis.utils.post_processing import remove_included from cv_analysis.utils.filters import is_large_enough, has_acceptable_format +from cv_analysis.utils.structures import Rectangle from cv_analysis.utils.text import remove_primary_text_regions from cv_analysis.utils.visual_logging import vizlogger @@ -32,69 +33,4 @@ def detect_figures(image: np.array): rects = map(cv2.boundingRect, cnts) rects = remove_included(rects) - return list(rects) - - -def detect_figures_in_pdf(pdf_path, page_index=1, show=False): - - page = pdf2image.convert_from_path(pdf_path, dpi=300, first_page=page_index + 1, last_page=page_index + 1)[0] - page = np.array(page) - - redaction_contours = detect_figures(page) - page = draw_rectangles(page, redaction_contours) - vizlogger.debug(page, "figures03_final.png") - if show: - show_mpl(page) - return page - - -def detect_figures_in_test_files(): - def save_as_pdf(pages): - p1, p = pages[0], pages[1:] - out_pdf_path = "/home/lillian/ocr_docs/output_files/fig_detection_pdf.pdf" - p1.save( - out_pdf_path, "PDF", resolution=150.0, save_all=True, append_images=p - ) - path = "/home/lillian/ocr_docs/" - ex_pages = pd.read_csv(path+"/metadata/metadata2.csv") - pages_detected = [] - - t0 = timeit.default_timer() - for name, page_nr in zip(ex_pages.pdf_name, ex_pages.page): - page = pdf2image.convert_from_path(path + "/original/" + name, dpi=300, first_page=page_nr, last_page=page_nr)[0] - page = np.array(page) - redaction_contours = detect_figures(page) - page = draw_rectangles(page, redaction_contours) - pages_detected.append(Image.fromarray(page)) - print(timeit.default_timer()-t0) - - save_as_pdf(pages_detected) - - -def detect_figures_in_png(pdf_path, show=False): - - page = Image.open(pdf_path) - page = np.array(page) - - redaction_contours = detect_figures(page) - page = draw_rectangles(page, redaction_contours) - vizlogger.debug(page, "figures03_final.png") - if show: - show_mpl(page) - return page - - -def detect_figures_in_test_files_png(): - file_name = pd.read_csv(METADATA_TESTFILES) - pages = [] - t0 = timeit.default_timer() - for name in file_name.image_name: - page = detect_figures_in_png(path.join(PNG_FOR_TESTING, name+".png")) - pages.append(Image.fromarray(page)) - t1 = timeit.default_timer() - print(t1-t0) - p1, p = pages[0], pages[1:] - out_pdf_path = path.join(PNG_FIGURES_DETECTED, "fig_detectes.pdf") - p1.save( - out_pdf_path, "PDF", resolution=300.0, save_all=True, append_images=p - ) \ No newline at end of file + return list(map(Rectangle.from_xywh, rects)) diff --git a/cv_analysis/layout_parsing.py b/cv_analysis/layout_parsing.py index b6ae567..d4a80d6 100644 --- a/cv_analysis/layout_parsing.py +++ b/cv_analysis/layout_parsing.py @@ -4,10 +4,11 @@ from operator import __and__ import cv2 import numpy as np -from pdf2image import pdf2image +#from pdf2image import pdf2image -from cv_analysis.utils.display import show_mpl -from cv_analysis.utils.draw import draw_rectangles +#from cv_analysis.utils.display import show_mpl +#from cv_analysis.utils.draw import draw_rectangles +from cv_analysis.utils.structures import Rectangle from cv_analysis.utils.post_processing import remove_overlapping, remove_included, has_no_parent from cv_analysis.utils.visual_logging import vizlogger @@ -36,17 +37,17 @@ def parse_layout(image: np.array): if len(image_.shape) > 2: image_ = cv2.cvtColor(image_, cv2.COLOR_BGR2GRAY) - #vizlogger.debug(image_, "layout01_start.png") + vizlogger.debug(image_, "layout01_start.png") image_ = cv2.GaussianBlur(image_, (7, 7), 0) - #vizlogger.debug(image_, "layout02_blur.png") + vizlogger.debug(image_, "layout02_blur.png") thresh = cv2.threshold(image_, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] vizlogger.debug(image_, "layout03_theshold.png") kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) - #vizlogger.debug(kernel, "layout04_kernel.png") + vizlogger.debug(kernel, "layout04_kernel.png") dilate = cv2.dilate(thresh, kernel, iterations=4) - #vizlogger.debug(dilate, "layout05_dilate.png") + vizlogger.debug(dilate, "layout05_dilate.png") rects = list(find_segments(dilate)) @@ -55,16 +56,16 @@ def parse_layout(image: np.array): x, y, w, h = rect cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 0), -1) cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 7) - #vizlogger.debug(image, "layout06_rectangles.png") + vizlogger.debug(image, "layout06_rectangles.png") _, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY) - #vizlogger.debug(image, "layout07_threshold.png") + vizlogger.debug(image, "layout07_threshold.png") image = ~image - #vizlogger.debug(image, "layout08_inverse.png") + vizlogger.debug(image, "layout08_inverse.png") if len(image.shape) > 2: image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - #vizlogger.debug(image, "layout09_convertcolor.png") + vizlogger.debug(image, "layout09_convertcolor.png") rects = find_segments(image) # <- End of meta detection @@ -72,73 +73,23 @@ def parse_layout(image: np.array): rects = remove_included(rects) rects = remove_overlapping(rects) - return list(rects) + return list(map(Rectangle.from_xywh, rects)) -def annotate_layout_in_pdf(pdf_path, page_index=1, return_rects=False, show=False): +# def annotate_layout_in_pdf(page, return_rects=False, show=False): - page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0] - page = np.array(page) +# #page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0] +# #page = np.array(page) - rects = parse_layout(page) +# rects = parse_layout(page) - if return_rects: - return rects, page - elif show: - page = draw_rectangles(page, rects) - vizlogger.debug(page, "layout10_output.png") - show_mpl(page) - else: - page = draw_rectangles(page, rects) - return page +# if return_rects: +# return rects, page +# elif show: +# page = draw_rectangles(page, rects) +# vizlogger.debug(page, "layout10_output.png") +# show_mpl(page) +# else: +# page = draw_rectangles(page, rects) +# return page - -""" -def find_layout_boxes(image: np.array): - - if len(image.shape) > 2: - image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - image = cv2.GaussianBlur(image, (5, 5), 1) - image = cv2.threshold(image, 253, 255, cv2.THRESH_BINARY)[1] - img_bin = ~image - - line_min_width = 10 - kernel_h = np.ones((10, line_min_width), np.uint8) - kernel_v = np.ones((line_min_width, 10), np.uint8) - - img_bin_h = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernel_h) - img_bin_v = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernel_v) - - img_bin_final = img_bin_h | img_bin_v - - contours = cv2.findContours(img_bin_final, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) - contours = imutils.grab_contours(contours) - - for c in contours: - peri = cv2.arcLength(c, True) - approx = cv2.approxPolyDP(c, 0.04 * peri, True) - yield cv2.boundingRect(approx) - - -def annotate_layout_boxes(image, rects): - for rect in rects: - (x, y, w, h) = rect - cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2) - - return image - - -def annotate_layout_in_pdf(pdf_path, page_index=1): - - page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0] - page = np.array(page) - - layout_boxes = find_layout_boxes(page) - page = annotate_layout_boxes(page, layout_boxes) - - fig, ax = plt.subplots(1, 1) - fig.set_size_inches(20, 20) - ax.imshow(page) - plt.show() - -""" diff --git a/cv_analysis/redaction_detection.py b/cv_analysis/redaction_detection.py index e81ef53..a633bee 100644 --- a/cv_analysis/redaction_detection.py +++ b/cv_analysis/redaction_detection.py @@ -40,14 +40,14 @@ def find_redactions(image: np.array, min_normalized_area=200000): return [] -def annotate_redactions_in_pdf(pdf_path, page_index=1, show=False): +# def annotate_redactions_in_pdf(page, show=False): - page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0] - page = np.array(page) +# #page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0] +# #page = np.array(page) - redaction_contours = find_redactions(page) - page = draw_contours(page, redaction_contours) - vizlogger.debug(page, "redactions05_output.png") +# redaction_contours = find_redactions(page) +# page = draw_contours(page, redaction_contours) +# vizlogger.debug(page, "redactions05_output.png") - if show: - show_mpl(page) +# if show: +# show_mpl(page) diff --git a/cv_analysis/table_parsing.py b/cv_analysis/table_parsing.py index 2b6344b..7b6b4a4 100644 --- a/cv_analysis/table_parsing.py +++ b/cv_analysis/table_parsing.py @@ -2,19 +2,16 @@ from functools import partial from itertools import chain, starmap from operator import attrgetter - - import cv2 import numpy as np -from pdf2image import pdf2image - - -from cv_analysis.utils.display import show_mpl -from cv_analysis.utils.draw import draw_rectangles +#from pdf2image import pdf2image +#from cv_analysis.utils.display import show_mpl +#from cv_analysis.utils.draw import draw_rectangles from cv_analysis.utils.post_processing import xywh_to_vecs, xywh_to_vec_rect, adjacent1d -from cv_analysis.utils.deskew import deskew_histbased, deskew -from cv_analysis.utils.filters import is_large_enough +#from cv_analysis.utils.deskew import deskew_histbased, deskew +#from cv_analysis.utils.filters import is_large_enough +from cv_analysis.utils.structures import Rectangle from cv_analysis.utils.visual_logging import vizlogger from cv_analysis.layout_parsing import parse_layout @@ -138,7 +135,7 @@ def find_table_layout_boxes(image: np.array): for box in layout_boxes: (x, y, w, h) = box if w * h >= 100000: - table_boxes.append(box) + table_boxes.append(Rectangle.from_xywh(box)) return table_boxes @@ -148,7 +145,7 @@ def preprocess(image: np.array): return ~image -def parse_table(image: np.array, show=False): +def parse_tables(image: np.array, show=False): """Runs the full table parsing process. Args: @@ -164,42 +161,33 @@ def parse_table(image: np.array, show=False): image = preprocess(image) - table_layout_boxes = find_table_layout_boxes(image) + #table_layout_boxes = find_table_layout_boxes(image) image = isolate_vertical_and_horizontal_components(image) # image = add_external_contours(image, image) - # vizlogger.debug(image, "external_contours_added.png") + vizlogger.debug(image, "external_contours_added.png") _, _, stats, _ = cv2.connectedComponentsWithStats(~image, connectivity=8, ltype=cv2.CV_32S) stats = np.vstack(list(filter(is_large_enough, stats))) rects = stats[:, :-1][2:] - - return list(map(list, rects)) + + #print(rects) + return list(map(Rectangle.from_xywh, rects)) -def annotate_tables_in_pdf(pdf_path, page_index=0, deskew=False, show=False): - """ """ - page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0] - page = np.array(page) - if show: - show_mpl(page) - if deskew: - page, _ = deskew_histbased(page) - - stats = parse_table(page) - page = draw_rectangles(page, stats, annotate=True) - vizlogger.debug(page, "tables15_final_output.png") - if show: - show_mpl(page) - - -def tables_in_image(cropped_image): - table_rects = parse_table(cropped_image) - - if len(table_rects) > 0: - return True, table_rects - else: - return False, None +# def annotate_tables_in_pdf(page, page_index=0, deskew=False, show=False): +# """ """ +# #page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0] +# #page = np.array(page) +# if show: +# show_mpl(page) +# if deskew: +# page, _ = deskew_histbased(page) +# stats = parse_tables(page) +# page = draw_rectangles(page, stats, annotate=True) +# vizlogger.debug(page, "tables15_final_output.png") +# if show: +# show_mpl(page) diff --git a/cv_analysis/test/test_data/ref.json b/cv_analysis/test/test_data/ref.json new file mode 100644 index 0000000..222d778 --- /dev/null +++ b/cv_analysis/test/test_data/ref.json @@ -0,0 +1,54 @@ +{ + "images": [ + { + "name": "test1.png", + "source_document": "Amended Residue analytical method for the determ.pdf", + "page": 7 + }, + { + "name": "test2.png", + "source_document": "Amended Residue analytical method for the determ.pdf", + "page": 39 + }, + { + "name": "test3.png", + "source_document": "VV-857853.pdf", + "page": 8 + }, + { + "name": "test4.png", + "source_document": "Sulphur_RAR_09_Volume_3CA_B-7_2021-04-09.pdf", + "page": 25 + }, + { + "name": "test5.png", + "source_document": "Sulphur_RAR_09_Volume_3CA_B-7_2021-04-09.pdf", + "page": 35 + }, + { + "name": "test6.png", + "source_document": "VV-128279.pdf", + "page": 49 + }, + { + "name": "test7.png", + "source_document": "VV-376573.pdf", + "page": 86 + }, + { + "name": "test8.png", + "source_document": "VV-377325.pdf", + "page": 218 + }, + { + "name": "test9.png", + "source_document": "VV-857853.pdf", + "page": 10 + }, + { + "name": "test10.png", + "source_document": "VV-334103.pdf", + "page": 28 + } + ] +} \ No newline at end of file diff --git a/cv_analysis/test/test_data/table.json b/cv_analysis/test/test_data/table.json index 5e78d0e..44e82ea 100644 --- a/cv_analysis/test/test_data/table.json +++ b/cv_analysis/test/test_data/table.json @@ -1,62 +1,359 @@ { - "0": [ - [211, 447, 367, 47], - [581, 447, 417, 47], - [1001, 447, 406, 47], - [211, 497, 367, 47], - [580, 497, 418, 47], - [1001, 497, 406, 47], - [211, 547, 367, 47], - [580, 547, 418, 47], - [1001, 547, 406, 47], - [211, 597, 367, 47], - [581, 597, 417, 47], - [1001, 597, 406, 48], - [212, 647, 366, 48], - [581, 647, 417, 48], - [1001, 647, 406, 48], - [581, 697, 417, 47], - [1001, 697, 407, 48], - [212, 698, 366, 47], - [211, 747, 367, 48], - [581, 747, 417, 48], - [1001, 748, 407, 47], - [211, 798, 367, 47], - [581, 798, 417, 47], - [1001, 798, 407, 47], - [212, 848, 366, 47], - [581, 848, 417, 47], - [1001, 848, 407, 48], - [212, 898, 366, 48], - [581, 898, 417, 48], - [1001, 898, 407, 48], - [462, 1195, 368, 48], - [833, 1195, 404, 48], - [462, 1245, 368, 48], - [833, 1245, 404, 47], - [462, 1296, 368, 47], - [833, 1296, 404, 47], - [462, 1346, 368, 47], - [833, 1346, 404, 47], - [462, 1396, 368, 47], - [834, 1396, 403, 47], - [462, 1446, 368, 48], - [833, 1446, 404, 48], - [462, 1496, 368, 48], - [833, 1496, 404, 48], - [462, 1547, 368, 47], - [834, 1547, 403, 47], - [462, 1597, 368, 48], - [834, 1597, 403, 47], - [462, 1647, 368, 48], - [833, 1647, 404, 48], - [462, 1698, 368, 47], - [833, 1698, 404, 47], - [462, 1748, 368, 47], - [834, 1748, 403, 47], - [462, 1798, 368, 47], - [834, 1798, 403, 47], - [462, 1848, 368, 48], - [834, 1848, 403, 48] - ] - } \ No newline at end of file + "pages": [ + { + "page": 0, + "pageWidth": 2346, + "pageHeight": 1663, + "cells": [ + { + "x": 211, + "y": 447, + "width": 367, + "height": 47 + }, + { + "x": 581, + "y": 447, + "width": 417, + "height": 47 + }, + { + "x": 1001, + "y": 447, + "width": 406, + "height": 47 + }, + { + "x": 211, + "y": 497, + "width": 367, + "height": 47 + }, + { + "x": 580, + "y": 497, + "width": 418, + "height": 47 + }, + { + "x": 1001, + "y": 497, + "width": 406, + "height": 47 + }, + { + "x": 211, + "y": 547, + "width": 367, + "height": 47 + }, + { + "x": 580, + "y": 547, + "width": 418, + "height": 47 + }, + { + "x": 1001, + "y": 547, + "width": 406, + "height": 47 + }, + { + "x": 211, + "y": 597, + "width": 367, + "height": 47 + }, + { + "x": 581, + "y": 597, + "width": 417, + "height": 47 + }, + { + "x": 1001, + "y": 597, + "width": 406, + "height": 48 + }, + { + "x": 212, + "y": 647, + "width": 366, + "height": 48 + }, + { + "x": 581, + "y": 647, + "width": 417, + "height": 48 + }, + { + "x": 1001, + "y": 647, + "width": 406, + "height": 48 + }, + { + "x": 581, + "y": 697, + "width": 417, + "height": 47 + }, + { + "x": 1001, + "y": 697, + "width": 407, + "height": 48 + }, + { + "x": 212, + "y": 698, + "width": 366, + "height": 47 + }, + { + "x": 211, + "y": 747, + "width": 367, + "height": 48 + }, + { + "x": 581, + "y": 747, + "width": 417, + "height": 48 + }, + { + "x": 1001, + "y": 748, + "width": 407, + "height": 47 + }, + { + "x": 211, + "y": 798, + "width": 367, + "height": 47 + }, + { + "x": 581, + "y": 798, + "width": 417, + "height": 47 + }, + { + "x": 1001, + "y": 798, + "width": 407, + "height": 47 + }, + { + "x": 212, + "y": 848, + "width": 366, + "height": 47 + }, + { + "x": 581, + "y": 848, + "width": 417, + "height": 47 + }, + { + "x": 1001, + "y": 848, + "width": 407, + "height": 48 + }, + { + "x": 212, + "y": 898, + "width": 366, + "height": 48 + }, + { + "x": 581, + "y": 898, + "width": 417, + "height": 48 + }, + { + "x": 1001, + "y": 898, + "width": 407, + "height": 48 + }, + { + "x": 462, + "y": 1195, + "width": 368, + "height": 48 + }, + { + "x": 833, + "y": 1195, + "width": 404, + "height": 48 + }, + { + "x": 462, + "y": 1245, + "width": 368, + "height": 48 + }, + { + "x": 833, + "y": 1245, + "width": 404, + "height": 47 + }, + { + "x": 462, + "y": 1296, + "width": 368, + "height": 47 + }, + { + "x": 833, + "y": 1296, + "width": 404, + "height": 47 + }, + { + "x": 462, + "y": 1346, + "width": 368, + "height": 47 + }, + { + "x": 833, + "y": 1346, + "width": 404, + "height": 47 + }, + { + "x": 462, + "y": 1396, + "width": 368, + "height": 47 + }, + { + "x": 834, + "y": 1396, + "width": 403, + "height": 47 + }, + { + "x": 462, + "y": 1446, + "width": 368, + "height": 48 + }, + { + "x": 833, + "y": 1446, + "width": 404, + "height": 48 + }, + { + "x": 462, + "y": 1496, + "width": 368, + "height": 48 + }, + { + "x": 833, + "y": 1496, + "width": 404, + "height": 48 + }, + { + "x": 462, + "y": 1547, + "width": 368, + "height": 47 + }, + { + "x": 834, + "y": 1547, + "width": 403, + "height": 47 + }, + { + "x": 462, + "y": 1597, + "width": 368, + "height": 48 + }, + { + "x": 834, + "y": 1597, + "width": 403, + "height": 47 + }, + { + "x": 462, + "y": 1647, + "width": 368, + "height": 48 + }, + { + "x": 833, + "y": 1647, + "width": 404, + "height": 48 + }, + { + "x": 462, + "y": 1698, + "width": 368, + "height": 47 + }, + { + "x": 833, + "y": 1698, + "width": 404, + "height": 47 + }, + { + "x": 462, + "y": 1748, + "width": 368, + "height": 47 + }, + { + "x": 834, + "y": 1748, + "width": 403, + "height": 47 + }, + { + "x": 462, + "y": 1798, + "width": 368, + "height": 47 + }, + { + "x": 834, + "y": 1798, + "width": 403, + "height": 47 + }, + { + "x": 462, + "y": 1848, + "width": 368, + "height": 48 + }, + { + "x": 834, + "y": 1848, + "width": 403, + "height": 48 + } + ] + } + ] +} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test1.json b/cv_analysis/test/test_data/test1.json new file mode 100644 index 0000000..b2fdf55 --- /dev/null +++ b/cv_analysis/test/test_data/test1.json @@ -0,0 +1,191 @@ +{ + "pages": [ + { + "page": 0, + "pageWidth": 2481, + "pageHeight": 3509, + "cells": [ + { + "x": 604, + "y": 400, + "width": 399, + "height": 142 + }, + { + "x": 1006, + "y": 400, + "width": 49, + "height": 142 + }, + { + "x": 1058, + "y": 400, + "width": 1215, + "height": 142 + }, + { + "x": 604, + "y": 545, + "width": 399, + "height": 83 + }, + { + "x": 1006, + "y": 545, + "width": 49, + "height": 83 + }, + { + "x": 1058, + "y": 545, + "width": 1215, + "height": 83 + }, + { + "x": 604, + "y": 631, + "width": 399, + "height": 84 + }, + { + "x": 1006, + "y": 631, + "width": 49, + "height": 84 + }, + { + "x": 1058, + "y": 631, + "width": 1215, + "height": 84 + }, + { + "x": 604, + "y": 718, + "width": 399, + "height": 84 + }, + { + "x": 1006, + "y": 718, + "width": 49, + "height": 84 + }, + { + "x": 1058, + "y": 718, + "width": 1215, + "height": 84 + }, + { + "x": 604, + "y": 805, + "width": 399, + "height": 804 + }, + { + "x": 1006, + "y": 805, + "width": 49, + "height": 804 + }, + { + "x": 1058, + "y": 805, + "width": 1215, + "height": 804 + }, + { + "x": 604, + "y": 1724, + "width": 399, + "height": 84 + }, + { + "x": 1006, + "y": 1724, + "width": 49, + "height": 84 + }, + { + "x": 1058, + "y": 1724, + "width": 1215, + "height": 84 + }, + { + "x": 604, + "y": 1811, + "width": 399, + "height": 83 + }, + { + "x": 1006, + "y": 1811, + "width": 49, + "height": 83 + }, + { + "x": 1058, + "y": 1811, + "width": 1215, + "height": 83 + }, + { + "x": 604, + "y": 1897, + "width": 399, + "height": 84 + }, + { + "x": 1006, + "y": 1897, + "width": 49, + "height": 84 + }, + { + "x": 1058, + "y": 1897, + "width": 1215, + "height": 84 + }, + { + "x": 604, + "y": 1984, + "width": 399, + "height": 84 + }, + { + "x": 1006, + "y": 1984, + "width": 49, + "height": 84 + }, + { + "x": 1058, + "y": 1984, + "width": 1215, + "height": 84 + }, + { + "x": 604, + "y": 2071, + "width": 399, + "height": 813 + }, + { + "x": 1006, + "y": 2071, + "width": 49, + "height": 813 + }, + { + "x": 1058, + "y": 2071, + "width": 1215, + "height": 813 + } + ] + } + ] +} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test1.png b/cv_analysis/test/test_data/test1.png new file mode 100644 index 0000000..a1f0d3b Binary files /dev/null and b/cv_analysis/test/test_data/test1.png differ diff --git a/cv_analysis/test/test_data/test10.json b/cv_analysis/test/test_data/test10.json new file mode 100644 index 0000000..d52d3e7 --- /dev/null +++ b/cv_analysis/test/test_data/test10.json @@ -0,0 +1,851 @@ +{ + "pages": [ + { + "page": 0, + "pageWidth": 2480, + "pageHeight": 3509, + "cells": [ + { + "x": 1828, + "y": 1667, + "width": 382, + "height": 55 + }, + { + "x": 1477, + "y": 1670, + "width": 349, + "height": 55 + }, + { + "x": 1126, + "y": 1673, + "width": 349, + "height": 54 + }, + { + "x": 776, + "y": 1676, + "width": 348, + "height": 54 + }, + { + "x": 425, + "y": 1678, + "width": 348, + "height": 54 + }, + { + "x": 1828, + "y": 1722, + "width": 382, + "height": 55 + }, + { + "x": 1477, + "y": 1725, + "width": 349, + "height": 55 + }, + { + "x": 1126, + "y": 1728, + "width": 349, + "height": 54 + }, + { + "x": 776, + "y": 1730, + "width": 348, + "height": 55 + }, + { + "x": 425, + "y": 1733, + "width": 349, + "height": 54 + }, + { + "x": 1828, + "y": 1777, + "width": 382, + "height": 54 + }, + { + "x": 1478, + "y": 1780, + "width": 348, + "height": 54 + }, + { + "x": 1126, + "y": 1783, + "width": 349, + "height": 53 + }, + { + "x": 776, + "y": 1786, + "width": 348, + "height": 53 + }, + { + "x": 426, + "y": 1788, + "width": 348, + "height": 53 + }, + { + "x": 1828, + "y": 1832, + "width": 382, + "height": 54 + }, + { + "x": 1478, + "y": 1835, + "width": 348, + "height": 54 + }, + { + "x": 1126, + "y": 1837, + "width": 349, + "height": 55 + }, + { + "x": 776, + "y": 1840, + "width": 348, + "height": 54 + }, + { + "x": 426, + "y": 1843, + "width": 348, + "height": 53 + }, + { + "x": 1829, + "y": 1887, + "width": 381, + "height": 54 + }, + { + "x": 1478, + "y": 1890, + "width": 348, + "height": 53 + }, + { + "x": 1126, + "y": 1892, + "width": 349, + "height": 54 + }, + { + "x": 776, + "y": 1895, + "width": 348, + "height": 53 + }, + { + "x": 426, + "y": 1898, + "width": 348, + "height": 53 + }, + { + "x": 1829, + "y": 1941, + "width": 381, + "height": 54 + }, + { + "x": 1478, + "y": 1944, + "width": 348, + "height": 54 + }, + { + "x": 1126, + "y": 1947, + "width": 349, + "height": 53 + }, + { + "x": 776, + "y": 1949, + "width": 348, + "height": 54 + }, + { + "x": 426, + "y": 1952, + "width": 348, + "height": 53 + }, + { + "x": 1829, + "y": 1995, + "width": 381, + "height": 55 + }, + { + "x": 1478, + "y": 1999, + "width": 348, + "height": 53 + }, + { + "x": 1127, + "y": 2001, + "width": 348, + "height": 54 + }, + { + "x": 776, + "y": 2004, + "width": 348, + "height": 53 + }, + { + "x": 426, + "y": 2006, + "width": 348, + "height": 54 + }, + { + "x": 1829, + "y": 2050, + "width": 382, + "height": 54 + }, + { + "x": 1478, + "y": 2053, + "width": 348, + "height": 54 + }, + { + "x": 1127, + "y": 2056, + "width": 349, + "height": 54 + }, + { + "x": 776, + "y": 2058, + "width": 348, + "height": 54 + }, + { + "x": 426, + "y": 2061, + "width": 348, + "height": 54 + }, + { + "x": 1829, + "y": 2105, + "width": 382, + "height": 54 + }, + { + "x": 1478, + "y": 2108, + "width": 349, + "height": 54 + }, + { + "x": 1127, + "y": 2110, + "width": 349, + "height": 54 + }, + { + "x": 776, + "y": 2113, + "width": 348, + "height": 54 + }, + { + "x": 426, + "y": 2116, + "width": 348, + "height": 54 + }, + { + "x": 1829, + "y": 2159, + "width": 382, + "height": 55 + }, + { + "x": 1478, + "y": 2164, + "width": 349, + "height": 52 + }, + { + "x": 1127, + "y": 2166, + "width": 349, + "height": 53 + }, + { + "x": 776, + "y": 2169, + "width": 348, + "height": 53 + }, + { + "x": 426, + "y": 2172, + "width": 348, + "height": 52 + }, + { + "x": 1829, + "y": 2215, + "width": 382, + "height": 55 + }, + { + "x": 1478, + "y": 2218, + "width": 349, + "height": 54 + }, + { + "x": 1127, + "y": 2221, + "width": 349, + "height": 54 + }, + { + "x": 776, + "y": 2224, + "width": 348, + "height": 53 + }, + { + "x": 426, + "y": 2226, + "width": 348, + "height": 54 + }, + { + "x": 1830, + "y": 2271, + "width": 381, + "height": 50 + }, + { + "x": 1479, + "y": 2274, + "width": 348, + "height": 49 + }, + { + "x": 1127, + "y": 2276, + "width": 349, + "height": 50 + }, + { + "x": 776, + "y": 2279, + "width": 348, + "height": 49 + }, + { + "x": 426, + "y": 2282, + "width": 348, + "height": 49 + }, + { + "x": 1830, + "y": 2322, + "width": 383, + "height": 53 + }, + { + "x": 1480, + "y": 2325, + "width": 348, + "height": 53 + }, + { + "x": 1127, + "y": 2328, + "width": 350, + "height": 52 + }, + { + "x": 776, + "y": 2330, + "width": 348, + "height": 53 + }, + { + "x": 426, + "y": 2333, + "width": 348, + "height": 52 + }, + { + "x": 1831, + "y": 2377, + "width": 382, + "height": 52 + }, + { + "x": 1480, + "y": 2380, + "width": 348, + "height": 52 + }, + { + "x": 1127, + "y": 2382, + "width": 350, + "height": 52 + }, + { + "x": 776, + "y": 2385, + "width": 348, + "height": 51 + }, + { + "x": 426, + "y": 2388, + "width": 348, + "height": 51 + }, + { + "x": 1831, + "y": 2430, + "width": 382, + "height": 53 + }, + { + "x": 1480, + "y": 2433, + "width": 348, + "height": 53 + }, + { + "x": 1127, + "y": 2436, + "width": 350, + "height": 53 + }, + { + "x": 777, + "y": 2438, + "width": 348, + "height": 53 + }, + { + "x": 426, + "y": 2441, + "width": 348, + "height": 52 + }, + { + "x": 1831, + "y": 2485, + "width": 383, + "height": 53 + }, + { + "x": 1480, + "y": 2488, + "width": 348, + "height": 52 + }, + { + "x": 1127, + "y": 2490, + "width": 350, + "height": 53 + }, + { + "x": 777, + "y": 2493, + "width": 348, + "height": 52 + }, + { + "x": 427, + "y": 2495, + "width": 348, + "height": 53 + }, + { + "x": 1831, + "y": 2539, + "width": 383, + "height": 53 + }, + { + "x": 1480, + "y": 2542, + "width": 349, + "height": 53 + }, + { + "x": 1127, + "y": 2545, + "width": 350, + "height": 52 + }, + { + "x": 777, + "y": 2547, + "width": 348, + "height": 53 + }, + { + "x": 427, + "y": 2550, + "width": 348, + "height": 52 + }, + { + "x": 1831, + "y": 2593, + "width": 383, + "height": 54 + }, + { + "x": 1480, + "y": 2596, + "width": 349, + "height": 54 + }, + { + "x": 1127, + "y": 2599, + "width": 351, + "height": 53 + }, + { + "x": 777, + "y": 2601, + "width": 348, + "height": 54 + }, + { + "x": 427, + "y": 2604, + "width": 348, + "height": 53 + }, + { + "x": 1831, + "y": 2649, + "width": 383, + "height": 53 + }, + { + "x": 1480, + "y": 2652, + "width": 349, + "height": 52 + }, + { + "x": 1128, + "y": 2654, + "width": 350, + "height": 53 + }, + { + "x": 777, + "y": 2657, + "width": 348, + "height": 52 + }, + { + "x": 427, + "y": 2659, + "width": 348, + "height": 53 + }, + { + "x": 1832, + "y": 2703, + "width": 382, + "height": 54 + }, + { + "x": 1480, + "y": 2706, + "width": 349, + "height": 53 + }, + { + "x": 1128, + "y": 2709, + "width": 350, + "height": 53 + }, + { + "x": 778, + "y": 2711, + "width": 347, + "height": 53 + }, + { + "x": 427, + "y": 2714, + "width": 348, + "height": 53 + }, + { + "x": 1832, + "y": 2758, + "width": 382, + "height": 53 + }, + { + "x": 1481, + "y": 2761, + "width": 348, + "height": 53 + }, + { + "x": 1128, + "y": 2764, + "width": 350, + "height": 52 + }, + { + "x": 778, + "y": 2766, + "width": 348, + "height": 53 + }, + { + "x": 427, + "y": 2769, + "width": 348, + "height": 52 + }, + { + "x": 1832, + "y": 2812, + "width": 382, + "height": 55 + }, + { + "x": 1481, + "y": 2816, + "width": 349, + "height": 53 + }, + { + "x": 1128, + "y": 2818, + "width": 351, + "height": 54 + }, + { + "x": 778, + "y": 2820, + "width": 348, + "height": 54 + }, + { + "x": 428, + "y": 2823, + "width": 347, + "height": 53 + }, + { + "x": 1832, + "y": 2868, + "width": 382, + "height": 54 + }, + { + "x": 1481, + "y": 2871, + "width": 349, + "height": 53 + }, + { + "x": 1128, + "y": 2873, + "width": 351, + "height": 54 + }, + { + "x": 778, + "y": 2876, + "width": 348, + "height": 53 + }, + { + "x": 428, + "y": 2878, + "width": 348, + "height": 53 + }, + { + "x": 1832, + "y": 2923, + "width": 382, + "height": 53 + }, + { + "x": 1481, + "y": 2926, + "width": 349, + "height": 52 + }, + { + "x": 1128, + "y": 2928, + "width": 351, + "height": 53 + }, + { + "x": 778, + "y": 2931, + "width": 348, + "height": 52 + }, + { + "x": 428, + "y": 2933, + "width": 348, + "height": 52 + }, + { + "x": 1832, + "y": 2978, + "width": 382, + "height": 53 + }, + { + "x": 1481, + "y": 2980, + "width": 349, + "height": 53 + }, + { + "x": 1129, + "y": 2983, + "width": 350, + "height": 53 + }, + { + "x": 778, + "y": 2985, + "width": 348, + "height": 53 + }, + { + "x": 428, + "y": 2987, + "width": 348, + "height": 53 + }, + { + "x": 1832, + "y": 3032, + "width": 382, + "height": 53 + }, + { + "x": 1481, + "y": 3035, + "width": 349, + "height": 52 + }, + { + "x": 1129, + "y": 3038, + "width": 350, + "height": 52 + }, + { + "x": 779, + "y": 3040, + "width": 348, + "height": 52 + }, + { + "x": 428, + "y": 3042, + "width": 349, + "height": 53 + }, + { + "x": 1832, + "y": 3086, + "width": 382, + "height": 55 + }, + { + "x": 1481, + "y": 3089, + "width": 349, + "height": 54 + }, + { + "x": 1129, + "y": 3092, + "width": 350, + "height": 54 + }, + { + "x": 779, + "y": 3094, + "width": 348, + "height": 54 + }, + { + "x": 429, + "y": 3097, + "width": 348, + "height": 53 + }, + { + "x": 1832, + "y": 3141, + "width": 382, + "height": 55 + }, + { + "x": 1481, + "y": 3144, + "width": 349, + "height": 55 + }, + { + "x": 1129, + "y": 3147, + "width": 350, + "height": 54 + }, + { + "x": 779, + "y": 3149, + "width": 349, + "height": 55 + }, + { + "x": 429, + "y": 3152, + "width": 348, + "height": 54 + } + ] + } + ] +} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test10.png b/cv_analysis/test/test_data/test10.png new file mode 100644 index 0000000..9af83ed Binary files /dev/null and b/cv_analysis/test/test_data/test10.png differ diff --git a/cv_analysis/test/test_data/test2.json b/cv_analysis/test/test_data/test2.json new file mode 100644 index 0000000..1deb655 --- /dev/null +++ b/cv_analysis/test/test_data/test2.json @@ -0,0 +1,839 @@ +{ + "pages": [ + { + "page": 0, + "pageWidth": 2481, + "pageHeight": 3509, + "cells": [ + { + "x": 327, + "y": 609, + "width": 353, + "height": 161 + }, + { + "x": 683, + "y": 609, + "width": 311, + "height": 161 + }, + { + "x": 997, + "y": 609, + "width": 531, + "height": 161 + }, + { + "x": 1531, + "y": 609, + "width": 247, + "height": 161 + }, + { + "x": 1781, + "y": 609, + "width": 246, + "height": 161 + }, + { + "x": 2030, + "y": 609, + "width": 246, + "height": 161 + }, + { + "x": 327, + "y": 773, + "width": 353, + "height": 272 + }, + { + "x": 683, + "y": 773, + "width": 311, + "height": 65 + }, + { + "x": 997, + "y": 773, + "width": 531, + "height": 65 + }, + { + "x": 1531, + "y": 773, + "width": 247, + "height": 65 + }, + { + "x": 1781, + "y": 773, + "width": 246, + "height": 65 + }, + { + "x": 2030, + "y": 773, + "width": 246, + "height": 65 + }, + { + "x": 683, + "y": 841, + "width": 311, + "height": 66 + }, + { + "x": 997, + "y": 841, + "width": 531, + "height": 66 + }, + { + "x": 1531, + "y": 841, + "width": 247, + "height": 66 + }, + { + "x": 1781, + "y": 841, + "width": 246, + "height": 66 + }, + { + "x": 2030, + "y": 841, + "width": 246, + "height": 66 + }, + { + "x": 683, + "y": 910, + "width": 311, + "height": 66 + }, + { + "x": 997, + "y": 910, + "width": 531, + "height": 66 + }, + { + "x": 1531, + "y": 910, + "width": 247, + "height": 66 + }, + { + "x": 1781, + "y": 910, + "width": 246, + "height": 66 + }, + { + "x": 2030, + "y": 910, + "width": 246, + "height": 66 + }, + { + "x": 683, + "y": 979, + "width": 311, + "height": 66 + }, + { + "x": 997, + "y": 979, + "width": 531, + "height": 66 + }, + { + "x": 1531, + "y": 979, + "width": 247, + "height": 66 + }, + { + "x": 1781, + "y": 979, + "width": 246, + "height": 66 + }, + { + "x": 2030, + "y": 979, + "width": 246, + "height": 66 + }, + { + "x": 327, + "y": 1047, + "width": 353, + "height": 273 + }, + { + "x": 683, + "y": 1048, + "width": 311, + "height": 65 + }, + { + "x": 997, + "y": 1048, + "width": 531, + "height": 65 + }, + { + "x": 1531, + "y": 1048, + "width": 247, + "height": 65 + }, + { + "x": 1781, + "y": 1048, + "width": 246, + "height": 65 + }, + { + "x": 2030, + "y": 1048, + "width": 246, + "height": 65 + }, + { + "x": 683, + "y": 1116, + "width": 311, + "height": 66 + }, + { + "x": 997, + "y": 1116, + "width": 531, + "height": 66 + }, + { + "x": 1531, + "y": 1116, + "width": 247, + "height": 66 + }, + { + "x": 1781, + "y": 1116, + "width": 246, + "height": 66 + }, + { + "x": 2030, + "y": 1116, + "width": 246, + "height": 66 + }, + { + "x": 683, + "y": 1185, + "width": 311, + "height": 66 + }, + { + "x": 997, + "y": 1185, + "width": 531, + "height": 66 + }, + { + "x": 1531, + "y": 1185, + "width": 247, + "height": 66 + }, + { + "x": 1781, + "y": 1185, + "width": 246, + "height": 66 + }, + { + "x": 2030, + "y": 1185, + "width": 246, + "height": 66 + }, + { + "x": 683, + "y": 1254, + "width": 311, + "height": 66 + }, + { + "x": 997, + "y": 1254, + "width": 531, + "height": 66 + }, + { + "x": 1531, + "y": 1254, + "width": 247, + "height": 66 + }, + { + "x": 1781, + "y": 1254, + "width": 246, + "height": 66 + }, + { + "x": 2030, + "y": 1254, + "width": 246, + "height": 66 + }, + { + "x": 327, + "y": 1322, + "width": 353, + "height": 273 + }, + { + "x": 683, + "y": 1323, + "width": 311, + "height": 66 + }, + { + "x": 997, + "y": 1323, + "width": 531, + "height": 66 + }, + { + "x": 1531, + "y": 1323, + "width": 247, + "height": 66 + }, + { + "x": 1781, + "y": 1323, + "width": 246, + "height": 66 + }, + { + "x": 2030, + "y": 1323, + "width": 246, + "height": 66 + }, + { + "x": 683, + "y": 1392, + "width": 311, + "height": 65 + }, + { + "x": 997, + "y": 1392, + "width": 531, + "height": 65 + }, + { + "x": 1531, + "y": 1392, + "width": 247, + "height": 65 + }, + { + "x": 1781, + "y": 1392, + "width": 246, + "height": 65 + }, + { + "x": 2030, + "y": 1392, + "width": 246, + "height": 65 + }, + { + "x": 683, + "y": 1460, + "width": 311, + "height": 66 + }, + { + "x": 997, + "y": 1460, + "width": 531, + "height": 66 + }, + { + "x": 1531, + "y": 1460, + "width": 247, + "height": 66 + }, + { + "x": 1781, + "y": 1460, + "width": 246, + "height": 66 + }, + { + "x": 2030, + "y": 1460, + "width": 246, + "height": 66 + }, + { + "x": 683, + "y": 1529, + "width": 311, + "height": 66 + }, + { + "x": 997, + "y": 1529, + "width": 531, + "height": 66 + }, + { + "x": 1531, + "y": 1529, + "width": 247, + "height": 66 + }, + { + "x": 1781, + "y": 1529, + "width": 246, + "height": 66 + }, + { + "x": 2030, + "y": 1529, + "width": 246, + "height": 66 + }, + { + "x": 327, + "y": 1849, + "width": 353, + "height": 161 + }, + { + "x": 683, + "y": 1849, + "width": 311, + "height": 161 + }, + { + "x": 997, + "y": 1849, + "width": 531, + "height": 161 + }, + { + "x": 1531, + "y": 1849, + "width": 247, + "height": 161 + }, + { + "x": 1781, + "y": 1849, + "width": 246, + "height": 161 + }, + { + "x": 2030, + "y": 1849, + "width": 246, + "height": 161 + }, + { + "x": 327, + "y": 2013, + "width": 353, + "height": 272 + }, + { + "x": 683, + "y": 2013, + "width": 311, + "height": 66 + }, + { + "x": 997, + "y": 2013, + "width": 531, + "height": 66 + }, + { + "x": 1531, + "y": 2013, + "width": 247, + "height": 66 + }, + { + "x": 1781, + "y": 2013, + "width": 246, + "height": 66 + }, + { + "x": 2030, + "y": 2013, + "width": 246, + "height": 66 + }, + { + "x": 683, + "y": 2082, + "width": 311, + "height": 65 + }, + { + "x": 997, + "y": 2082, + "width": 531, + "height": 65 + }, + { + "x": 1531, + "y": 2082, + "width": 247, + "height": 65 + }, + { + "x": 1781, + "y": 2082, + "width": 246, + "height": 65 + }, + { + "x": 2030, + "y": 2082, + "width": 246, + "height": 65 + }, + { + "x": 683, + "y": 2150, + "width": 311, + "height": 66 + }, + { + "x": 997, + "y": 2150, + "width": 531, + "height": 66 + }, + { + "x": 1531, + "y": 2150, + "width": 247, + "height": 66 + }, + { + "x": 1781, + "y": 2150, + "width": 246, + "height": 66 + }, + { + "x": 2030, + "y": 2150, + "width": 246, + "height": 66 + }, + { + "x": 683, + "y": 2219, + "width": 311, + "height": 66 + }, + { + "x": 997, + "y": 2219, + "width": 531, + "height": 66 + }, + { + "x": 1531, + "y": 2219, + "width": 247, + "height": 66 + }, + { + "x": 1781, + "y": 2219, + "width": 246, + "height": 66 + }, + { + "x": 2030, + "y": 2219, + "width": 246, + "height": 66 + }, + { + "x": 327, + "y": 2287, + "width": 353, + "height": 273 + }, + { + "x": 683, + "y": 2288, + "width": 311, + "height": 66 + }, + { + "x": 997, + "y": 2288, + "width": 531, + "height": 66 + }, + { + "x": 1531, + "y": 2288, + "width": 247, + "height": 66 + }, + { + "x": 1781, + "y": 2288, + "width": 246, + "height": 66 + }, + { + "x": 2030, + "y": 2288, + "width": 246, + "height": 66 + }, + { + "x": 683, + "y": 2357, + "width": 311, + "height": 65 + }, + { + "x": 997, + "y": 2357, + "width": 531, + "height": 65 + }, + { + "x": 1531, + "y": 2357, + "width": 247, + "height": 65 + }, + { + "x": 1781, + "y": 2357, + "width": 246, + "height": 65 + }, + { + "x": 2030, + "y": 2357, + "width": 246, + "height": 65 + }, + { + "x": 683, + "y": 2425, + "width": 311, + "height": 66 + }, + { + "x": 997, + "y": 2425, + "width": 531, + "height": 66 + }, + { + "x": 1531, + "y": 2425, + "width": 247, + "height": 66 + }, + { + "x": 1781, + "y": 2425, + "width": 246, + "height": 66 + }, + { + "x": 2030, + "y": 2425, + "width": 246, + "height": 66 + }, + { + "x": 683, + "y": 2494, + "width": 311, + "height": 66 + }, + { + "x": 997, + "y": 2494, + "width": 531, + "height": 66 + }, + { + "x": 1531, + "y": 2494, + "width": 247, + "height": 66 + }, + { + "x": 1781, + "y": 2494, + "width": 246, + "height": 66 + }, + { + "x": 2030, + "y": 2494, + "width": 246, + "height": 66 + }, + { + "x": 327, + "y": 2562, + "width": 353, + "height": 273 + }, + { + "x": 683, + "y": 2563, + "width": 311, + "height": 66 + }, + { + "x": 997, + "y": 2563, + "width": 531, + "height": 66 + }, + { + "x": 1531, + "y": 2563, + "width": 247, + "height": 66 + }, + { + "x": 1781, + "y": 2563, + "width": 246, + "height": 66 + }, + { + "x": 2030, + "y": 2563, + "width": 246, + "height": 66 + }, + { + "x": 683, + "y": 2632, + "width": 311, + "height": 65 + }, + { + "x": 997, + "y": 2632, + "width": 531, + "height": 65 + }, + { + "x": 1531, + "y": 2632, + "width": 247, + "height": 65 + }, + { + "x": 1781, + "y": 2632, + "width": 246, + "height": 65 + }, + { + "x": 2030, + "y": 2632, + "width": 246, + "height": 65 + }, + { + "x": 683, + "y": 2700, + "width": 311, + "height": 66 + }, + { + "x": 997, + "y": 2700, + "width": 531, + "height": 66 + }, + { + "x": 1531, + "y": 2700, + "width": 247, + "height": 66 + }, + { + "x": 1781, + "y": 2700, + "width": 246, + "height": 66 + }, + { + "x": 2030, + "y": 2700, + "width": 246, + "height": 66 + }, + { + "x": 683, + "y": 2769, + "width": 311, + "height": 66 + }, + { + "x": 997, + "y": 2769, + "width": 531, + "height": 66 + }, + { + "x": 1531, + "y": 2769, + "width": 247, + "height": 66 + }, + { + "x": 1781, + "y": 2769, + "width": 246, + "height": 66 + }, + { + "x": 2030, + "y": 2769, + "width": 246, + "height": 66 + } + ] + } + ] +} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test2.png b/cv_analysis/test/test_data/test2.png new file mode 100644 index 0000000..96a4d5e Binary files /dev/null and b/cv_analysis/test/test_data/test2.png differ diff --git a/cv_analysis/test/test_data/test3.json b/cv_analysis/test/test_data/test3.json new file mode 100644 index 0000000..b451279 --- /dev/null +++ b/cv_analysis/test/test_data/test3.json @@ -0,0 +1,233 @@ +{ + "pages": [ + { + "page": 0, + "pageWidth": 2481, + "pageHeight": 3506, + "cells": [ + { + "x": 195, + "y": 1519, + "width": 2091, + "height": 84 + }, + { + "x": 195, + "y": 1604, + "width": 583, + "height": 123 + }, + { + "x": 783, + "y": 1605, + "width": 1503, + "height": 124 + }, + { + "x": 195, + "y": 1730, + "width": 583, + "height": 65 + }, + { + "x": 783, + "y": 1731, + "width": 1502, + "height": 66 + }, + { + "x": 195, + "y": 1798, + "width": 583, + "height": 65 + }, + { + "x": 783, + "y": 1799, + "width": 1502, + "height": 66 + }, + { + "x": 195, + "y": 1866, + "width": 583, + "height": 65 + }, + { + "x": 782, + "y": 1867, + "width": 1503, + "height": 66 + }, + { + "x": 195, + "y": 1934, + "width": 583, + "height": 65 + }, + { + "x": 782, + "y": 1935, + "width": 1503, + "height": 66 + }, + { + "x": 194, + "y": 2003, + "width": 584, + "height": 64 + }, + { + "x": 782, + "y": 2003, + "width": 535, + "height": 66 + }, + { + "x": 1321, + "y": 2005, + "width": 455, + "height": 64 + }, + { + "x": 1780, + "y": 2005, + "width": 505, + "height": 65 + }, + { + "x": 193, + "y": 2071, + "width": 585, + "height": 65 + }, + { + "x": 782, + "y": 2071, + "width": 535, + "height": 66 + }, + { + "x": 1321, + "y": 2073, + "width": 455, + "height": 64 + }, + { + "x": 1780, + "y": 2073, + "width": 505, + "height": 65 + }, + { + "x": 193, + "y": 2139, + "width": 585, + "height": 65 + }, + { + "x": 782, + "y": 2140, + "width": 535, + "height": 65 + }, + { + "x": 1321, + "y": 2141, + "width": 455, + "height": 64 + }, + { + "x": 1780, + "y": 2141, + "width": 505, + "height": 65 + }, + { + "x": 193, + "y": 2207, + "width": 585, + "height": 65 + }, + { + "x": 782, + "y": 2208, + "width": 535, + "height": 65 + }, + { + "x": 1321, + "y": 2209, + "width": 455, + "height": 64 + }, + { + "x": 1780, + "y": 2210, + "width": 505, + "height": 64 + }, + { + "x": 193, + "y": 2275, + "width": 585, + "height": 66 + }, + { + "x": 782, + "y": 2276, + "width": 535, + "height": 65 + }, + { + "x": 1321, + "y": 2277, + "width": 455, + "height": 65 + }, + { + "x": 1780, + "y": 2278, + "width": 505, + "height": 65 + }, + { + "x": 193, + "y": 2343, + "width": 584, + "height": 66 + }, + { + "x": 782, + "y": 2344, + "width": 1503, + "height": 67 + }, + { + "x": 193, + "y": 2412, + "width": 584, + "height": 65 + }, + { + "x": 781, + "y": 2413, + "width": 1504, + "height": 66 + }, + { + "x": 193, + "y": 2480, + "width": 584, + "height": 65 + }, + { + "x": 781, + "y": 2481, + "width": 1504, + "height": 66 + } + ] + } + ] +} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test3.png b/cv_analysis/test/test_data/test3.png new file mode 100644 index 0000000..036b455 Binary files /dev/null and b/cv_analysis/test/test_data/test3.png differ diff --git a/cv_analysis/test/test_data/test4.json b/cv_analysis/test/test_data/test4.json new file mode 100644 index 0000000..0ed435f --- /dev/null +++ b/cv_analysis/test/test_data/test4.json @@ -0,0 +1,203 @@ +{ + "pages": [ + { + "page": 0, + "pageWidth": 3508, + "pageHeight": 2481, + "cells": [ + { + "x": 299, + "y": 761, + "width": 232, + "height": 354 + }, + { + "x": 533, + "y": 761, + "width": 235, + "height": 354 + }, + { + "x": 770, + "y": 761, + "width": 205, + "height": 354 + }, + { + "x": 977, + "y": 761, + "width": 211, + "height": 354 + }, + { + "x": 1190, + "y": 761, + "width": 425, + "height": 138 + }, + { + "x": 1617, + "y": 761, + "width": 195, + "height": 354 + }, + { + "x": 1814, + "y": 761, + "width": 168, + "height": 354 + }, + { + "x": 1984, + "y": 761, + "width": 184, + "height": 354 + }, + { + "x": 2170, + "y": 761, + "width": 191, + "height": 354 + }, + { + "x": 2363, + "y": 761, + "width": 274, + "height": 138 + }, + { + "x": 2639, + "y": 761, + "width": 159, + "height": 354 + }, + { + "x": 2800, + "y": 761, + "width": 466, + "height": 354 + }, + { + "x": 1190, + "y": 901, + "width": 141, + "height": 214 + }, + { + "x": 1333, + "y": 901, + "width": 123, + "height": 214 + }, + { + "x": 1458, + "y": 901, + "width": 157, + "height": 214 + }, + { + "x": 2363, + "y": 901, + "width": 130, + "height": 214 + }, + { + "x": 2495, + "y": 901, + "width": 142, + "height": 214 + }, + { + "x": 299, + "y": 1121, + "width": 232, + "height": 581 + }, + { + "x": 533, + "y": 1121, + "width": 235, + "height": 581 + }, + { + "x": 770, + "y": 1121, + "width": 205, + "height": 581 + }, + { + "x": 977, + "y": 1121, + "width": 211, + "height": 581 + }, + { + "x": 1190, + "y": 1121, + "width": 141, + "height": 581 + }, + { + "x": 1333, + "y": 1121, + "width": 123, + "height": 581 + }, + { + "x": 1458, + "y": 1121, + "width": 157, + "height": 581 + }, + { + "x": 1617, + "y": 1121, + "width": 195, + "height": 581 + }, + { + "x": 1814, + "y": 1121, + "width": 168, + "height": 581 + }, + { + "x": 1984, + "y": 1121, + "width": 184, + "height": 581 + }, + { + "x": 2170, + "y": 1121, + "width": 191, + "height": 581 + }, + { + "x": 2363, + "y": 1121, + "width": 130, + "height": 581 + }, + { + "x": 2495, + "y": 1121, + "width": 142, + "height": 581 + }, + { + "x": 2639, + "y": 1121, + "width": 159, + "height": 581 + }, + { + "x": 2800, + "y": 1121, + "width": 466, + "height": 581 + } + ] + } + ] +} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test4.png b/cv_analysis/test/test_data/test4.png new file mode 100644 index 0000000..181a372 Binary files /dev/null and b/cv_analysis/test/test_data/test4.png differ diff --git a/cv_analysis/test/test_data/test5.json b/cv_analysis/test/test_data/test5.json new file mode 100644 index 0000000..b2d49bf --- /dev/null +++ b/cv_analysis/test/test_data/test5.json @@ -0,0 +1,563 @@ +{ + "pages": [ + { + "page": 0, + "pageWidth": 2481, + "pageHeight": 3508, + "cells": [ + { + "x": 299, + "y": 706, + "width": 154, + "height": 411 + }, + { + "x": 455, + "y": 706, + "width": 239, + "height": 411 + }, + { + "x": 696, + "y": 706, + "width": 210, + "height": 411 + }, + { + "x": 908, + "y": 706, + "width": 818, + "height": 56 + }, + { + "x": 1728, + "y": 706, + "width": 376, + "height": 56 + }, + { + "x": 2106, + "y": 706, + "width": 138, + "height": 411 + }, + { + "x": 908, + "y": 764, + "width": 200, + "height": 353 + }, + { + "x": 1110, + "y": 764, + "width": 214, + "height": 353 + }, + { + "x": 1326, + "y": 764, + "width": 167, + "height": 353 + }, + { + "x": 1495, + "y": 764, + "width": 231, + "height": 353 + }, + { + "x": 1728, + "y": 764, + "width": 230, + "height": 353 + }, + { + "x": 1960, + "y": 764, + "width": 144, + "height": 353 + }, + { + "x": 299, + "y": 1123, + "width": 154, + "height": 283 + }, + { + "x": 455, + "y": 1123, + "width": 239, + "height": 283 + }, + { + "x": 696, + "y": 1123, + "width": 210, + "height": 283 + }, + { + "x": 908, + "y": 1123, + "width": 200, + "height": 283 + }, + { + "x": 1110, + "y": 1123, + "width": 214, + "height": 283 + }, + { + "x": 1326, + "y": 1123, + "width": 167, + "height": 283 + }, + { + "x": 1495, + "y": 1123, + "width": 231, + "height": 283 + }, + { + "x": 1728, + "y": 1123, + "width": 230, + "height": 283 + }, + { + "x": 1960, + "y": 1123, + "width": 144, + "height": 283 + }, + { + "x": 2106, + "y": 1123, + "width": 138, + "height": 283 + }, + { + "x": 299, + "y": 1408, + "width": 154, + "height": 284 + }, + { + "x": 455, + "y": 1408, + "width": 239, + "height": 284 + }, + { + "x": 696, + "y": 1408, + "width": 210, + "height": 284 + }, + { + "x": 908, + "y": 1408, + "width": 200, + "height": 284 + }, + { + "x": 1110, + "y": 1408, + "width": 214, + "height": 284 + }, + { + "x": 1326, + "y": 1408, + "width": 167, + "height": 284 + }, + { + "x": 1495, + "y": 1408, + "width": 231, + "height": 284 + }, + { + "x": 1728, + "y": 1408, + "width": 230, + "height": 284 + }, + { + "x": 1960, + "y": 1408, + "width": 144, + "height": 284 + }, + { + "x": 2106, + "y": 1408, + "width": 138, + "height": 284 + }, + { + "x": 299, + "y": 2090, + "width": 169, + "height": 211 + }, + { + "x": 470, + "y": 2090, + "width": 253, + "height": 211 + }, + { + "x": 725, + "y": 2090, + "width": 229, + "height": 211 + }, + { + "x": 956, + "y": 2090, + "width": 516, + "height": 57 + }, + { + "x": 1474, + "y": 2090, + "width": 496, + "height": 57 + }, + { + "x": 1972, + "y": 2090, + "width": 272, + "height": 211 + }, + { + "x": 956, + "y": 2149, + "width": 184, + "height": 152 + }, + { + "x": 1142, + "y": 2149, + "width": 158, + "height": 152 + }, + { + "x": 1302, + "y": 2149, + "width": 170, + "height": 152 + }, + { + "x": 1474, + "y": 2149, + "width": 262, + "height": 152 + }, + { + "x": 1738, + "y": 2149, + "width": 232, + "height": 152 + }, + { + "x": 299, + "y": 2303, + "width": 169, + "height": 56 + }, + { + "x": 470, + "y": 2303, + "width": 253, + "height": 56 + }, + { + "x": 725, + "y": 2303, + "width": 229, + "height": 56 + }, + { + "x": 956, + "y": 2303, + "width": 184, + "height": 56 + }, + { + "x": 1142, + "y": 2303, + "width": 158, + "height": 56 + }, + { + "x": 1302, + "y": 2303, + "width": 170, + "height": 56 + }, + { + "x": 1474, + "y": 2303, + "width": 262, + "height": 56 + }, + { + "x": 1738, + "y": 2303, + "width": 232, + "height": 56 + }, + { + "x": 1972, + "y": 2303, + "width": 272, + "height": 56 + }, + { + "x": 299, + "y": 2361, + "width": 169, + "height": 204 + }, + { + "x": 470, + "y": 2361, + "width": 253, + "height": 204 + }, + { + "x": 725, + "y": 2361, + "width": 229, + "height": 97 + }, + { + "x": 956, + "y": 2361, + "width": 184, + "height": 97 + }, + { + "x": 1142, + "y": 2361, + "width": 158, + "height": 204 + }, + { + "x": 1302, + "y": 2361, + "width": 170, + "height": 204 + }, + { + "x": 1474, + "y": 2361, + "width": 262, + "height": 97 + }, + { + "x": 1738, + "y": 2361, + "width": 232, + "height": 97 + }, + { + "x": 1972, + "y": 2361, + "width": 272, + "height": 204 + }, + { + "x": 725, + "y": 2460, + "width": 229, + "height": 105 + }, + { + "x": 956, + "y": 2460, + "width": 184, + "height": 105 + }, + { + "x": 1474, + "y": 2460, + "width": 262, + "height": 105 + }, + { + "x": 1738, + "y": 2460, + "width": 232, + "height": 105 + }, + { + "x": 299, + "y": 2567, + "width": 169, + "height": 205 + }, + { + "x": 470, + "y": 2567, + "width": 253, + "height": 205 + }, + { + "x": 725, + "y": 2567, + "width": 229, + "height": 205 + }, + { + "x": 956, + "y": 2567, + "width": 184, + "height": 205 + }, + { + "x": 1142, + "y": 2567, + "width": 158, + "height": 205 + }, + { + "x": 1302, + "y": 2567, + "width": 170, + "height": 205 + }, + { + "x": 1474, + "y": 2567, + "width": 262, + "height": 205 + }, + { + "x": 1738, + "y": 2567, + "width": 232, + "height": 205 + }, + { + "x": 1972, + "y": 2567, + "width": 272, + "height": 205 + }, + { + "x": 299, + "y": 2774, + "width": 169, + "height": 56 + }, + { + "x": 470, + "y": 2774, + "width": 253, + "height": 56 + }, + { + "x": 725, + "y": 2774, + "width": 229, + "height": 56 + }, + { + "x": 956, + "y": 2774, + "width": 184, + "height": 56 + }, + { + "x": 1142, + "y": 2774, + "width": 158, + "height": 56 + }, + { + "x": 1302, + "y": 2774, + "width": 170, + "height": 56 + }, + { + "x": 1474, + "y": 2774, + "width": 262, + "height": 56 + }, + { + "x": 1738, + "y": 2774, + "width": 232, + "height": 56 + }, + { + "x": 1972, + "y": 2774, + "width": 272, + "height": 56 + }, + { + "x": 299, + "y": 2832, + "width": 169, + "height": 205 + }, + { + "x": 470, + "y": 2832, + "width": 253, + "height": 205 + }, + { + "x": 725, + "y": 2832, + "width": 229, + "height": 205 + }, + { + "x": 956, + "y": 2832, + "width": 184, + "height": 205 + }, + { + "x": 1142, + "y": 2832, + "width": 158, + "height": 205 + }, + { + "x": 1302, + "y": 2832, + "width": 170, + "height": 205 + }, + { + "x": 1474, + "y": 2832, + "width": 262, + "height": 205 + }, + { + "x": 1738, + "y": 2832, + "width": 232, + "height": 205 + }, + { + "x": 1972, + "y": 2832, + "width": 272, + "height": 205 + } + ] + } + ] +} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test5.png b/cv_analysis/test/test_data/test5.png new file mode 100644 index 0000000..e678300 Binary files /dev/null and b/cv_analysis/test/test_data/test5.png differ diff --git a/cv_analysis/test/test_data/test6.json b/cv_analysis/test/test_data/test6.json new file mode 100644 index 0000000..ec866c9 --- /dev/null +++ b/cv_analysis/test/test_data/test6.json @@ -0,0 +1,53 @@ +{ + "pages": [ + { + "page": 0, + "pageWidth": 2480, + "pageHeight": 3509, + "cells": [ + { + "x": 494, + "y": 960, + "width": 562, + "height": 453 + }, + { + "x": 1059, + "y": 962, + "width": 1065, + "height": 224 + }, + { + "x": 1060, + "y": 1190, + "width": 1066, + "height": 225 + }, + { + "x": 500, + "y": 1419, + "width": 559, + "height": 521 + }, + { + "x": 1060, + "y": 1420, + "width": 1070, + "height": 521 + }, + { + "x": 506, + "y": 1945, + "width": 553, + "height": 222 + }, + { + "x": 1063, + "y": 1946, + "width": 1070, + "height": 223 + } + ] + } + ] +} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test6.png b/cv_analysis/test/test_data/test6.png new file mode 100644 index 0000000..1461f7e Binary files /dev/null and b/cv_analysis/test/test_data/test6.png differ diff --git a/cv_analysis/test/test_data/test7.json b/cv_analysis/test/test_data/test7.json new file mode 100644 index 0000000..b007ce2 --- /dev/null +++ b/cv_analysis/test/test_data/test7.json @@ -0,0 +1,29 @@ +{ + "pages": [ + { + "page": 0, + "pageWidth": 2471, + "pageHeight": 3505, + "cells": [ + { + "x": 572, + "y": 725, + "width": 451, + "height": 1785 + }, + { + "x": 1025, + "y": 725, + "width": 505, + "height": 1785 + }, + { + "x": 1533, + "y": 724, + "width": 454, + "height": 1786 + } + ] + } + ] +} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test7.png b/cv_analysis/test/test_data/test7.png new file mode 100644 index 0000000..7a7b5b6 Binary files /dev/null and b/cv_analysis/test/test_data/test7.png differ diff --git a/cv_analysis/test/test_data/test8.json b/cv_analysis/test/test_data/test8.json new file mode 100644 index 0000000..ed6e7c5 --- /dev/null +++ b/cv_analysis/test/test_data/test8.json @@ -0,0 +1,179 @@ +{ + "pages": [ + { + "page": 0, + "pageWidth": 2471, + "pageHeight": 3505, + "cells": [ + { + "x": 1292, + "y": 1009, + "width": 264, + "height": 132 + }, + { + "x": 538, + "y": 1015, + "width": 254, + "height": 124 + }, + { + "x": 797, + "y": 1016, + "width": 253, + "height": 124 + }, + { + "x": 1056, + "y": 1017, + "width": 230, + "height": 123 + }, + { + "x": 538, + "y": 1144, + "width": 253, + "height": 81 + }, + { + "x": 797, + "y": 1145, + "width": 253, + "height": 82 + }, + { + "x": 1056, + "y": 1146, + "width": 230, + "height": 81 + }, + { + "x": 1292, + "y": 1146, + "width": 257, + "height": 81 + }, + { + "x": 538, + "y": 1231, + "width": 253, + "height": 39 + }, + { + "x": 797, + "y": 1232, + "width": 253, + "height": 39 + }, + { + "x": 1055, + "y": 1233, + "width": 230, + "height": 38 + }, + { + "x": 1291, + "y": 1233, + "width": 258, + "height": 38 + }, + { + "x": 538, + "y": 1277, + "width": 253, + "height": 80 + }, + { + "x": 797, + "y": 1277, + "width": 252, + "height": 80 + }, + { + "x": 1055, + "y": 1278, + "width": 230, + "height": 80 + }, + { + "x": 1291, + "y": 1278, + "width": 258, + "height": 80 + }, + { + "x": 538, + "y": 1362, + "width": 253, + "height": 40 + }, + { + "x": 797, + "y": 1363, + "width": 253, + "height": 40 + }, + { + "x": 1055, + "y": 1363, + "width": 230, + "height": 40 + }, + { + "x": 1291, + "y": 1363, + "width": 258, + "height": 40 + }, + { + "x": 538, + "y": 1407, + "width": 253, + "height": 82 + }, + { + "x": 797, + "y": 1408, + "width": 253, + "height": 81 + }, + { + "x": 1055, + "y": 1408, + "width": 231, + "height": 82 + }, + { + "x": 1291, + "y": 1409, + "width": 258, + "height": 81 + }, + { + "x": 538, + "y": 1494, + "width": 254, + "height": 209 + }, + { + "x": 797, + "y": 1494, + "width": 253, + "height": 209 + }, + { + "x": 1055, + "y": 1495, + "width": 231, + "height": 209 + }, + { + "x": 1291, + "y": 1495, + "width": 265, + "height": 214 + } + ] + } + ] +} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test8.png b/cv_analysis/test/test_data/test8.png new file mode 100644 index 0000000..2ba4f3c Binary files /dev/null and b/cv_analysis/test/test_data/test8.png differ diff --git a/cv_analysis/test/test_data/test9.json b/cv_analysis/test/test_data/test9.json new file mode 100644 index 0000000..3b0dc49 --- /dev/null +++ b/cv_analysis/test/test_data/test9.json @@ -0,0 +1,335 @@ +{ + "pages": [ + { + "page": 0, + "pageWidth": 2481, + "pageHeight": 3506, + "cells": [ + { + "x": 1913, + "y": 602, + "width": 56, + "height": 670 + }, + { + "x": 1973, + "y": 602, + "width": 56, + "height": 670 + }, + { + "x": 2032, + "y": 603, + "width": 57, + "height": 669 + }, + { + "x": 605, + "y": 647, + "width": 389, + "height": 430 + }, + { + "x": 997, + "y": 646, + "width": 210, + "height": 432 + }, + { + "x": 1211, + "y": 647, + "width": 210, + "height": 432 + }, + { + "x": 1425, + "y": 647, + "width": 66, + "height": 432 + }, + { + "x": 1496, + "y": 647, + "width": 65, + "height": 432 + }, + { + "x": 605, + "y": 1083, + "width": 388, + "height": 350 + }, + { + "x": 997, + "y": 1083, + "width": 209, + "height": 352 + }, + { + "x": 1210, + "y": 1083, + "width": 211, + "height": 352 + }, + { + "x": 1424, + "y": 1083, + "width": 67, + "height": 352 + }, + { + "x": 1495, + "y": 1083, + "width": 66, + "height": 352 + }, + { + "x": 1912, + "y": 1275, + "width": 57, + "height": 669 + }, + { + "x": 1972, + "y": 1275, + "width": 57, + "height": 670 + }, + { + "x": 2032, + "y": 1276, + "width": 57, + "height": 669 + }, + { + "x": 604, + "y": 1439, + "width": 389, + "height": 498 + }, + { + "x": 996, + "y": 1439, + "width": 67, + "height": 499 + }, + { + "x": 1067, + "y": 1439, + "width": 67, + "height": 499 + }, + { + "x": 1138, + "y": 1439, + "width": 68, + "height": 500 + }, + { + "x": 1210, + "y": 1439, + "width": 67, + "height": 500 + }, + { + "x": 1280, + "y": 1439, + "width": 67, + "height": 500 + }, + { + "x": 1351, + "y": 1439, + "width": 69, + "height": 500 + }, + { + "x": 1424, + "y": 1439, + "width": 67, + "height": 500 + }, + { + "x": 1495, + "y": 1439, + "width": 65, + "height": 500 + }, + { + "x": 603, + "y": 1943, + "width": 389, + "height": 291 + }, + { + "x": 996, + "y": 1943, + "width": 209, + "height": 292 + }, + { + "x": 1209, + "y": 1943, + "width": 210, + "height": 292 + }, + { + "x": 1424, + "y": 1943, + "width": 67, + "height": 292 + }, + { + "x": 1494, + "y": 1943, + "width": 66, + "height": 293 + }, + { + "x": 1911, + "y": 1948, + "width": 56, + "height": 669 + }, + { + "x": 1971, + "y": 1948, + "width": 56, + "height": 669 + }, + { + "x": 2030, + "y": 1949, + "width": 57, + "height": 669 + }, + { + "x": 603, + "y": 2239, + "width": 388, + "height": 304 + }, + { + "x": 995, + "y": 2239, + "width": 67, + "height": 305 + }, + { + "x": 1066, + "y": 2239, + "width": 67, + "height": 306 + }, + { + "x": 1137, + "y": 2239, + "width": 68, + "height": 306 + }, + { + "x": 1209, + "y": 2239, + "width": 66, + "height": 306 + }, + { + "x": 1280, + "y": 2240, + "width": 67, + "height": 305 + }, + { + "x": 1351, + "y": 2240, + "width": 68, + "height": 305 + }, + { + "x": 1423, + "y": 2240, + "width": 67, + "height": 305 + }, + { + "x": 1494, + "y": 2240, + "width": 65, + "height": 305 + }, + { + "x": 601, + "y": 2548, + "width": 390, + "height": 783 + }, + { + "x": 995, + "y": 2548, + "width": 66, + "height": 783 + }, + { + "x": 1065, + "y": 2548, + "width": 68, + "height": 783 + }, + { + "x": 1137, + "y": 2549, + "width": 68, + "height": 782 + }, + { + "x": 1209, + "y": 2549, + "width": 66, + "height": 782 + }, + { + "x": 1279, + "y": 2549, + "width": 67, + "height": 782 + }, + { + "x": 1350, + "y": 2549, + "width": 69, + "height": 782 + }, + { + "x": 1423, + "y": 2549, + "width": 67, + "height": 782 + }, + { + "x": 1493, + "y": 2549, + "width": 66, + "height": 782 + }, + { + "x": 1910, + "y": 2622, + "width": 57, + "height": 666 + }, + { + "x": 1970, + "y": 2622, + "width": 57, + "height": 667 + }, + { + "x": 2030, + "y": 2622, + "width": 56, + "height": 667 + } + ] + } + ] +} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test9.png b/cv_analysis/test/test_data/test9.png new file mode 100644 index 0000000..1c6d1b3 Binary files /dev/null and b/cv_analysis/test/test_data/test9.png differ diff --git a/cv_analysis/test/unit_tests/table_test.py b/cv_analysis/test/unit_tests/table_test.py index ee60000..61248df 100644 --- a/cv_analysis/test/unit_tests/table_test.py +++ b/cv_analysis/test/unit_tests/table_test.py @@ -1,7 +1,7 @@ from os.path import join import json -from cv_analysis.table_parsing import parse_table +from cv_analysis.table_parsing import parse_tables from cv_analysis.locations import TEST_DATA_DIR from cv_analysis.test.config import TEST_CONFIG from cv_analysis.utils.test_metrics import compute_document_score @@ -9,13 +9,37 @@ from cv_analysis.utils.preprocessing import open_pdf def test_table_parsing(): + for i in range(1, 11): + + img_path = join(TEST_DATA_DIR, f"test{i}.png") + json_path = join(TEST_DATA_DIR, f"test{i}.json") + pages = open_pdf(img_path) + + result = {"pages": []} + for i, page in enumerate(pages): + result["pages"].append({"page": str(i), "cells": [x.json_xywh() for x in parse_tables(page)]}) + with open(json_path) as f: + annotation = json.load(f) + + score = compute_document_score(result, annotation) + + assert round(score, 3) >= TEST_CONFIG.table_score_threshold + + +""" +def test_table_parsing(): + img_path = join(TEST_DATA_DIR, "table.jpg") json_path = join(TEST_DATA_DIR, "table.json") - pages = open_pdf(img_path)[0] - result = {} + pages = open_pdf(img_path) + + result = {"pages": []} for i, page in enumerate(pages): - result.update({str(i): parse_table(page)}) + result["pages"].append({"page": str(i), "cells": [x.xywh() for x in parse_tables(page)]}) with open(json_path) as f: annotation = json.load(f) + score = compute_document_score(result, annotation) + assert score >= TEST_CONFIG.table_score_threshold +""" diff --git a/cv_analysis/utils/draw.py b/cv_analysis/utils/draw.py index 0031f62..96d0b3f 100644 --- a/cv_analysis/utils/draw.py +++ b/cv_analysis/utils/draw.py @@ -3,7 +3,7 @@ import cv2 from cv_analysis.utils import copy_and_normalize_channels -def draw_contours(image, contours): +def draw_contours(image, contours, color=None, annotate=False): image = copy_and_normalize_channels(image) @@ -25,7 +25,6 @@ def draw_rectangles(image, rectangles, color=None, annotate=False): for rect in rectangles: x, y, w, h = rect cv2.rectangle(image, (x, y), (x + w, y + h), color, 2) - if annotate: annotate_rect(x, y, w, h) diff --git a/cv_analysis/utils/preprocessing.py b/cv_analysis/utils/preprocessing.py index 70bab5e..456f4a5 100644 --- a/cv_analysis/utils/preprocessing.py +++ b/cv_analysis/utils/preprocessing.py @@ -3,29 +3,29 @@ import pdf2image from PIL import Image import cv2 -from cv_analysis.utils.deskew import deskew - def preprocess_pdf_image(page): if len(page.shape) > 2: page = cv2.cvtColor(page, cv2.COLOR_BGR2GRAY) page = cv2.fastNlMeansDenoising(page, h=3) - return deskew(page) + return page def open_pdf(pdf, first_page=0, last_page=None): + first_page += 1 last_page = None if last_page is None else last_page + 1 + if type(pdf) == str: - if pdf.endswith(".jpg") or pdf.endswith(".png"): + if pdf.lower().endswith((".png", ".jpg", ".jpeg")): pages = [Image.open(pdf)] - # assume pdf as default file type for a path argument - else: + else: # assume pdf as default file type for a path argument pages = pdf2image.convert_from_path(pdf, first_page=first_page, last_page=last_page) elif type(pdf) == bytes: pages = pdf2image.convert_from_bytes(pdf, first_page=first_page, last_page=last_page) elif type(pdf) in {list, ndarray}: return pdf + pages = [preprocess_pdf_image(array(p)) for p in pages] - pages, angles = list(zip(*pages)) - return pages, angles + + return pages diff --git a/cv_analysis/utils/structures.py b/cv_analysis/utils/structures.py new file mode 100644 index 0000000..ed7a6c2 --- /dev/null +++ b/cv_analysis/utils/structures.py @@ -0,0 +1,70 @@ +from json import dumps + +class Rectangle: + def __init__(self, x1=None, y1=None, w=None, h=None, x2=None, y2=None, indent=4, format="xywh"): + try: + self.x1 = x1 + self.y1 = y1 + self.w = w if w else x2 - x1 + self.h = h if h else y2 - y1 + self.x2 = x2 if x2 else x1 + w + self.y2 = y2 if y2 else y1 + h + assert (self.x1 + self.w) == self.x2 + assert (self.y1 + self.h) == self.y2 + self.indent = indent + self.format = format + except: + raise Exception("x1, y1, (w|x2), and (h|y2) must be defined.") + + def json_xywh(self): + return {"x": self.x1, "y": self.y1, "width": self.w, "height": self.h} + + def json_xyxy(self): + return {"x1": self.x1, "y1": self.y1, "x2": self.x2, "y2": self.y2} + + def json_full(self): + return {"x1": self.x1, "y1": self.y1, "x2": self.x2, "y2": self.y2, "width": self.w, "height": self.h} + + def json(self): + json_func = {"xywh": self.json_xywh, "xyxy": self.json_xyxy}.get(self.format, self.json_full) + return json_func() + + def xyxy(self): + return self.x1, self.y1, self.x2, self.y2 + + def xywh(self): + return self.x1, self.y1, self.w, self.h + + @classmethod + def from_xyxy(cls, xyxy_tuple): + x1, y1, x2, y2 = xyxy_tuple + return cls(x1=x1, y1=y1, x2=x2, y2=y2) + + @classmethod + def from_xywh(cls, xywh_tuple): + x, y, w, h = xywh_tuple + return cls(x1=x, y1=y, w=w, h=h) + + def __str__(self): + return dumps(self.json(), indent=self.indent) + + def __repr__(self): + return str(self.json()) + + def __iter__(self): + return list(self.json().values()).__iter__() + + + + +""" +boxes = [[30,40,5,6],[56,78,23,19],[5,100,45,35],[34,34,67,67]] +rectangles = list(map(Rectangle.from_xywh, boxes)) +rectangles +r = rectangles[1] +""" + +class Contour: + def __init__(self): + pass + diff --git a/cv_analysis/utils/test_metrics.py b/cv_analysis/utils/test_metrics.py index 2246042..217530c 100644 --- a/cv_analysis/utils/test_metrics.py +++ b/cv_analysis/utils/test_metrics.py @@ -1,12 +1,41 @@ import numpy as np +from cv_analysis.utils.structures import Rectangle -def compute_iou_from_boxes(box1, box2): +def xyxy_from_object(box_object): + try: + x1, y1, x2, y2 = box_object.xyxy() + except: + try: + x1 = box_object["x"] + y1 = box_object["y"] + x2 = x1 + box_object["width"] + y2 = y1 + box_object["height"] + except: + x1, y1, x2, y2 = box_object + return x1, y1, x2, y2 + + +def xywh_from_object(box_object): + try: + x, y, w, h = box_object.xywh() + except: + try: + x = box_object["x"] + y = box_object["y"] + w = box_object["width"] + h = box_object["height"] + except: + x, y, w, h = box_object + return x, y, w, h + + +def compute_iou_from_boxes(box1: Rectangle, box2: list): """ Each box of the form (x1, y1, delx, dely) """ - ax1, ay1, aw, ah = box1 - bx1, by1, bw, bh = box2 + ax1, ay1, aw, ah = xywh_from_object(box1) + bx1, by1, bw, bh = xywh_from_object(box2) ax2, ay2, bx2, by2 = ax1 + aw, ay1 + ah, bx1 + bw, by1 + bh if (ax1 > bx2) or (bx1 > ax2) or (ay1 > by2) or (by1 > ay2): return 0 @@ -40,15 +69,15 @@ def compute_page_iou(results_box_list, gt_box_list): def compute_document_score(results_dict, annotation_dict): - page_weights = np.array([len(v) for v in annotation_dict.values()]) + + page_weights = np.array([len(page["cells"]) for page in annotation_dict["pages"]]) page_weights = page_weights / sum(page_weights) + scores = [] - for key in annotation_dict: - scores.append(compute_page_iou(results_dict[key], annotation_dict[key])) + for i in range(len(annotation_dict["pages"])): + scores.append(compute_page_iou(results_dict["pages"][i]["cells"], annotation_dict["pages"][i]["cells"])) scores = np.array(scores) + doc_score = np.average(scores, weights=page_weights) + return doc_score - - -def compute_document_score_tables(results_dict, annotation_dict): - pass diff --git a/cv_analysis/utils/visual_logging.py b/cv_analysis/utils/visual_logging.py index 6afbd57..cbb5074 100644 --- a/cv_analysis/utils/visual_logging.py +++ b/cv_analysis/utils/visual_logging.py @@ -10,13 +10,30 @@ class VisualLogger: if not os.path.exists(self.output_folder): os.mkdir(self.output_folder) - def debug(self, img, name): - if self.level_is_debug(): - output_path = os.path.join(self.output_folder, name) - save_mpl(img, output_path) + def _save(self, img, name): + output_path = os.path.join(self.output_folder, name) + save_mpl(img, output_path) - def level_is_debug(self): - return self.level == "DEBUG" + def info(self, img, name): + if self._level_is_info(): + self._save(img, name) + + def debug(self, img, name): + if self._level_is_debug(): + self._save(img, name) + + def all(self, img, name): + if self._level_is_debug(): + self._save(img, name) + + def _level_is_info(self): + return self.level in {"INFO", "DEBUG", "ALL"} + + def _level_is_debug(self): + return self.level in {"DEBUG", "ALL"} + + def _level_is_all(self): + return self.level == "ALL" vizlogger = VisualLogger(CONFIG.visual_logging.level, CONFIG.visual_logging.output_folder) diff --git a/scripts/annotate.py b/scripts/annotate.py index 002e245..941722c 100644 --- a/scripts/annotate.py +++ b/scripts/annotate.py @@ -1,30 +1,48 @@ +""" +Usage: +python scripts/annotate.py /home/iriley/Documents/pdf/scanned/10.pdf 5 --type table --show +python scripts/annotate.py /home/iriley/Documents/pdf/scanned/10.pdf 5 --type redaction --show +python scripts/annotate.py /home/iriley/Documents/pdf/scanned/10.pdf 5 --type layout --show +python scripts/annotate.py /home/iriley/Documents/pdf/scanned/10.pdf 5 --type figure --show +""" + import argparse -from cv_analysis.table_parsing import annotate_tables_in_pdf -from cv_analysis.redaction_detection import annotate_redactions_in_pdf -from cv_analysis.layout_parsing import annotate_layout_in_pdf -from cv_analysis.figure_detection import detect_figures_in_pdf +from cv_analysis.utils.display import show_mpl +from cv_analysis.utils.draw import draw_contours, draw_rectangles +from cv_analysis.utils.preprocessing import open_pdf +from cv_analysis.utils.visual_logging import vizlogger def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("pdf_path") - parser.add_argument("page_index", type=int) - parser.add_argument("--type", choices=["table", "redaction", "layout", "figure", "figures"]) + parser.add_argument("--page_index", type=int, default=0) + parser.add_argument("--type", choices=["table", "redaction", "layout", "figure"], default="table") parser.add_argument("--show", action="store_true", default=False) - args = parser.parse_args() - return args +def annotate_page(page_image, analysis_function, drawing_function, name="tmp.png", show=True): + result = analysis_function(page_image) + page_image = drawing_function(page_image, result) + vizlogger.debug(page_image, "redactions05_output.png") + show_mpl(page_image) + + if __name__ == "__main__": args = parse_args() + page = open_pdf(args.pdf_path, first_page=args.page_index, last_page=args.page_index)[0] + name = f"{args.type}_final_result.png" + draw = draw_rectangles if args.type == "table": - annotate_tables_in_pdf(args.pdf_path, page_index=args.page_index, show=args.show) + from cv_analysis.table_parsing import parse_tables as analyze elif args.type == "redaction": - annotate_redactions_in_pdf(args.pdf_path, page_index=args.page_index, show=args.show) + from cv_analysis.redaction_detection import find_redactions as analyze + draw = draw_contours elif args.type == "layout": - annotate_layout_in_pdf(args.pdf_path, page_index=args.page_index, show=args.show) + from cv_analysis.layout_parsing import parse_layout as analyze elif args.type == "figure": - detect_figures_in_pdf(args.pdf_path, page_index=args.page_index, show=True) + from cv_analysis.figure_detection import detect_figures as analyze + annotate_page(page, analyze, draw, name=name, show=args.show) diff --git a/scripts/deskew_demo.py b/scripts/deskew_demo.py index 4a4032c..b09a342 100644 --- a/scripts/deskew_demo.py +++ b/scripts/deskew_demo.py @@ -7,7 +7,7 @@ from PIL import Image from cv_analysis.utils.deskew import deskew_histbased # , deskew_linebased from cv_analysis.utils.display import show_mpl from cv_analysis.utils.draw import draw_stats -from cv_analysis.table_parsing import parse_table +from cv_analysis.table_parsing import parse_tables def parse_args(): @@ -37,10 +37,10 @@ if __name__ == "__main__": page_corr_ = Image.fromarray(page_corr).convert("RGB") page_corr_.save(args.save_path.replace(".pdf", "_corrected.pdf")) # annotate_tables_in_pdf(args.pdf_path, page_index=args.page_index) - stats = parse_table(page) + stats = parse_tables(page) page = draw_stats(page, stats) show_mpl(page) - stats_corr = parse_table(page_corr) + stats_corr = parse_tables(page_corr) page_corr = draw_stats(page_corr, stats_corr) show_mpl(page_corr) if args.save_path: diff --git a/cv_analysis/test/scripts/export_example_pages.py b/scripts/export_example_pages.py similarity index 100% rename from cv_analysis/test/scripts/export_example_pages.py rename to scripts/export_example_pages.py diff --git a/src/run_service.py b/src/run_service.py index 86454eb..f063ed1 100644 --- a/src/run_service.py +++ b/src/run_service.py @@ -8,12 +8,14 @@ from prometheus_flask_exporter import PrometheusMetrics from waitress import serve from cv_analysis.utils import npconvert -from cv_analysis.table_parsing import parse_table +from cv_analysis.table_parsing import parse_tables from cv_analysis.redaction_detection import find_redactions from cv_analysis.layout_parsing import parse_layout from cv_analysis.figure_detection import detect_figures from cv_analysis.utils.logging import logger +from cv_analysis.utils.post_processing import Rectangle from cv_analysis.utils.preprocessing import open_pdf +from cv_analysis.utils.structures import Rectangle from cv_analysis.config import CONFIG @@ -42,7 +44,7 @@ def main(): @metrics.summary("tables_request_time_seconds", "Time spent processing tables request") def get_tables(): start_monitoring() - tables = annotate(parse_table) + tables = annotate(parse_tables) return tables @app.route("/redactions", methods=["POST"]) @@ -86,7 +88,7 @@ def make_annotations(pdf, annotation_function): results = [] for i, page in enumerate(pdf): boxes = annotation_function(page) - cells = [{"x": x, "y": y, "width": w, "height": h} for x, y, w, h in boxes] + cells = list(map(lambda x: x.json_xywh(), boxes)) results.append({"page": i, "pageWidth": page.shape[1], "pageHeight": page.shape[0], "cells": cells}) output_dict = {"pages": results} return jsonify(json.dumps(output_dict, default=npconvert)) @@ -101,10 +103,8 @@ def annotate(annotation_function): data = request.data logger.info(f"Received data.") logger.info(f"Processing data.") - pdf, angles = open_pdf(data) + pdf = open_pdf(data) annotations = make_annotations(pdf, annotation_function) - # if CONFIG.deskew.function != "identity": - # annotations.update({"deskew_angles": angles}) return annotations try: