diff --git a/cv_analysis/fig_detection_with_layout.py b/cv_analysis/fig_detection_with_layout.py index 921c256..a9226f9 100644 --- a/cv_analysis/fig_detection_with_layout.py +++ b/cv_analysis/fig_detection_with_layout.py @@ -4,6 +4,9 @@ from cv_analysis.table_parsing import tables_in_image, parse_table from cv_analysis.utils.text import find_primary_text_regions, remove_primary_text_regions from cv_analysis.utils.draw import draw_rectangles from cv_analysis.utils.display import show_mpl +from cv_analysis.utils.visual_logging import vizlogger +from PIL import Image + def cut_out_content_structures(layout_rects, page): @@ -11,7 +14,7 @@ def cut_out_content_structures(layout_rects, page): small_rects = [] for x, y, w, h in layout_rects: rect = (x, y, w, h) - if w * h >= 50000: + if w * h >= 75000: cropped_page = page[y:(y + h), x:(x + w)] large_rects.append([rect, cropped_page]) else: @@ -22,22 +25,18 @@ def cut_out_content_structures(layout_rects, page): def parse_content_structures(page, large_rects, small_rects): for coordinates, cropped_image in large_rects: - non_text_rects = detect_figures(cropped_image) - - if len(non_text_rects) == 0: + figure_rects = detect_figures(cropped_image) + if len(figure_rects) == 0: # text page = draw_rectangles(page, [coordinates], color=(0, 255, 0), annotate=True) - - elif tables_in_image(cropped_image)[0]: - page = draw_rectangles(page, [coordinates], color=(255, 0, 0), annotate=True) + elif tables_in_image(cropped_image)[0]: # table stats = parse_table(page) - page = draw_rectangles(page, stats, annotate=True) - - else: + page = draw_rectangles(page, stats, color=(255, 0, 0), annotate=True) + else: # figure page = draw_rectangles(page, [coordinates], color=(0, 0, 255), annotate=True) # for coordinates, cropped_image in small_rects: - # non_text_rects = detect_figures(cropped_image) - # if len(non_text_rects) == 0 and len(list(find_primary_text_regions(cropped_image))) > 0: + # figure_rects = detect_figures(cropped_image) + # if len(figure_rects) == 0 and len(list(find_primary_text_regions(cropped_image))) > 0: # page = draw_rectangles(page, [coordinates], color=(0, 255, 0), annotate=True) # else: # page = draw_rectangles(page, [coordinates], color=(0, 255, 255), annotate=True) @@ -48,9 +47,24 @@ def detect_figures_with_layout_parsing(pdf_path, page_index=1, show=False): layout_rects, page = annotate_layout_in_pdf(pdf_path, page_index, return_rects=True) big_structures, small_structures = cut_out_content_structures(layout_rects, page) page = parse_content_structures(page, big_structures, small_structures) - + vizlogger.debug(page, "figures03_final.png") if show: show_mpl(page) else: return page +# pages = [] +# for i in range(0,16): +# pdf_path = "/home/lillian/ocr_docs/Report on spectra.pdf" +# page_index = i +# layout_rects, page = annotate_layout_in_pdf(pdf_path, page_index, return_rects=True) +# big_structures, small_structures = cut_out_content_structures(layout_rects, page) +# page = parse_content_structures(page, big_structures, small_structures) +# pages.append(Image.fromarray(page)) +# p1, p = pages[0], pages[1:] +# +# out_pdf_path = "/home/lillian/ocr_docs/out1.pdf" +# +# p1.save( +# out_pdf_path, "PDF", resolution=150.0, save_all=True, append_images=p +# ) diff --git a/cv_analysis/figure_detection.py b/cv_analysis/figure_detection.py index 72f3543..e1a67b8 100644 --- a/cv_analysis/figure_detection.py +++ b/cv_analysis/figure_detection.py @@ -1,7 +1,6 @@ import cv2 import numpy as np from pdf2image import pdf2image -from PIL import Image from cv_analysis.utils.detection import detect_large_coherent_structures from cv_analysis.utils.display import show_mpl @@ -43,16 +42,3 @@ def detect_figures_in_pdf(pdf_path, page_index=1, show=False): if show: show_mpl(page) -# pages = [] -# for i in range(0,16): -# pdf_path = "/home/lillian/ocr_docs/Report on spectra.pdf" -# page_index = i -# page = detect_figures_in_pdf(pdf_path, page_index, show=False) -# pages.append(Image.fromarray(page)) -# p1, p = pages[0], pages[1:] -# -# out_pdf_path = "/home/lillian/ocr_docs/out.pdf" -# -# p1.save( -# out_pdf_path, "PDF", resolution=150.0, save_all=True, append_images=p -# ) \ No newline at end of file diff --git a/cv_analysis/layout_parsing.py b/cv_analysis/layout_parsing.py index cdc061f..b6ae567 100644 --- a/cv_analysis/layout_parsing.py +++ b/cv_analysis/layout_parsing.py @@ -36,17 +36,17 @@ def parse_layout(image: np.array): if len(image_.shape) > 2: image_ = cv2.cvtColor(image_, cv2.COLOR_BGR2GRAY) - vizlogger.debug(image_, "layout01_start.png") + #vizlogger.debug(image_, "layout01_start.png") image_ = cv2.GaussianBlur(image_, (7, 7), 0) - vizlogger.debug(image_, "layout02_blur.png") + #vizlogger.debug(image_, "layout02_blur.png") thresh = cv2.threshold(image_, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] vizlogger.debug(image_, "layout03_theshold.png") kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) - vizlogger.debug(kernel, "layout04_kernel.png") + #vizlogger.debug(kernel, "layout04_kernel.png") dilate = cv2.dilate(thresh, kernel, iterations=4) - vizlogger.debug(dilate, "layout05_dilate.png") + #vizlogger.debug(dilate, "layout05_dilate.png") rects = list(find_segments(dilate)) @@ -55,16 +55,16 @@ def parse_layout(image: np.array): x, y, w, h = rect cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 0), -1) cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 7) - vizlogger.debug(image, "layout06_rectangles.png") + #vizlogger.debug(image, "layout06_rectangles.png") _, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY) - vizlogger.debug(image, "layout07_threshold.png") + #vizlogger.debug(image, "layout07_threshold.png") image = ~image - vizlogger.debug(image, "layout08_inverse.png") + #vizlogger.debug(image, "layout08_inverse.png") if len(image.shape) > 2: image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - vizlogger.debug(image, "layout09_convertcolor.png") + #vizlogger.debug(image, "layout09_convertcolor.png") rects = find_segments(image) # <- End of meta detection @@ -87,8 +87,6 @@ def annotate_layout_in_pdf(pdf_path, page_index=1, return_rects=False, show=Fals elif show: page = draw_rectangles(page, rects) vizlogger.debug(page, "layout10_output.png") - - if show: show_mpl(page) else: page = draw_rectangles(page, rects) diff --git a/cv_analysis/table_parsing.py b/cv_analysis/table_parsing.py index 114b41d..d2bd1c3 100644 --- a/cv_analysis/table_parsing.py +++ b/cv_analysis/table_parsing.py @@ -18,8 +18,7 @@ from cv_analysis.layout_parsing import parse_layout def add_external_contours(image, img): contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) - - contours = filter(partial(is_large_enough, min_area=5000000), contours) + contours = filter(partial(is_large_enough, min_area=5000), contours) for cnt in contours: x, y, w, h = cv2.boundingRect(cnt) @@ -52,7 +51,7 @@ def apply_motion_blur(image: np.array, angle, size=80): return blurred -def isolate_vertical_and_horizontal_components(img_bin, bounding_rects): +def isolate_vertical_and_horizontal_components(img_bin): """Identifies and reinforces horizontal and vertical lines in a binary image. Args: @@ -69,19 +68,19 @@ def isolate_vertical_and_horizontal_components(img_bin, bounding_rects): img_bin_h = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernel_h) vizlogger.debug(img_bin_h, "tables01_isolate01_img_bin_h.png") img_bin_v = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernel_v) - vizlogger.debug(img_bin_v, "tables02_isolate02_img_bin_v.png") + vizlogger.debug(img_bin_v | img_bin_h, "tables02_isolate02_img_bin_v.png") kernel_h = np.ones((1, 30), np.uint8) kernel_v = np.ones((30, 1), np.uint8) img_bin_h = cv2.dilate(img_bin_h, kernel_h, iterations=2) vizlogger.debug(img_bin_h, "tables03_isolate03_dilate_h.png") img_bin_v = cv2.dilate(img_bin_v, kernel_v, iterations=2) - vizlogger.debug(img_bin_v, "tables04_isolate04_dilate_v.png") + vizlogger.debug(img_bin_v | img_bin_h, "tables04_isolate04_dilate_v.png") img_bin_h = apply_motion_blur(img_bin_h, 0) vizlogger.debug(img_bin_h, "tables09_isolate05_blur_h.png") img_bin_v = apply_motion_blur(img_bin_v, 90) - vizlogger.debug(img_bin_v, "tables10_isolate06_blur_v.png") + vizlogger.debug(img_bin_v | img_bin_h, "tables10_isolate06_blur_v.png") img_bin_final = img_bin_h | img_bin_v vizlogger.debug(img_bin_final, "tables11_isolate07_final.png") @@ -91,9 +90,6 @@ def isolate_vertical_and_horizontal_components(img_bin, bounding_rects): img_bin_final = cv2.dilate(img_bin_final, np.ones((1, 1), np.uint8), iterations=1) vizlogger.debug(img_bin_final, "tables11_isolate13_dilate.png") - img_bin_final = disconnect_non_existing_cells(img_bin_final, bounding_rects) - vizlogger.debug(img_bin_final, "tables12_isolate14_disconnect.png") - return img_bin_final @@ -160,7 +156,12 @@ def parse_table(image: np.array, show=False): image = preprocess(image) table_layout_boxes = find_table_layout_boxes(image) - image = isolate_vertical_and_horizontal_components(image, table_layout_boxes) + + image = isolate_vertical_and_horizontal_components(image) + image = disconnect_non_existing_cells(image, table_layout_boxes) + vizlogger.debug(image, "tables12_isolate14_disconnect.png") + image = add_external_contours(image, image) + vizlogger.debug(image, "external_contours_added.png") _, _, stats, _ = cv2.connectedComponentsWithStats(~image, connectivity=8, ltype=cv2.CV_32S) @@ -181,9 +182,6 @@ def annotate_tables_in_pdf(pdf_path, page_index=0, deskew=False, show=False): stats = parse_table(page) page = draw_rectangles(page, stats, annotate=True) - - if show: - show_mpl(page) vizlogger.debug(page, "tables15_final_output.png") diff --git a/cv_analysis/utils/display.py b/cv_analysis/utils/display.py index 4d346a5..999c9a2 100644 --- a/cv_analysis/utils/display.py +++ b/cv_analysis/utils/display.py @@ -18,6 +18,7 @@ def save_mpl(image, path): ax.imshow(image, cmap="gray") # plt.close() plt.savefig(path) + plt.close() def show_cv2(image):