From 2c39ffbcdda633da8fbc60681f851a5c817e70b5 Mon Sep 17 00:00:00 2001 From: llocarnini Date: Wed, 27 Apr 2022 11:12:23 +0200 Subject: [PATCH] changed kernel and iteration for better text removal --- cv_analysis/fig_detection_with_layout.py | 12 +++++++----- cv_analysis/figure_detection.py | 16 +++++++++++++++- cv_analysis/utils/text.py | 7 ++++--- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/cv_analysis/fig_detection_with_layout.py b/cv_analysis/fig_detection_with_layout.py index a9226f9..7f16244 100644 --- a/cv_analysis/fig_detection_with_layout.py +++ b/cv_analysis/fig_detection_with_layout.py @@ -1,11 +1,10 @@ from cv_analysis.layout_parsing import annotate_layout_in_pdf from cv_analysis.figure_detection import detect_figures from cv_analysis.table_parsing import tables_in_image, parse_table -from cv_analysis.utils.text import find_primary_text_regions, remove_primary_text_regions from cv_analysis.utils.draw import draw_rectangles from cv_analysis.utils.display import show_mpl from cv_analysis.utils.visual_logging import vizlogger -from PIL import Image +#from PIL import Image @@ -28,9 +27,12 @@ def parse_content_structures(page, large_rects, small_rects): figure_rects = detect_figures(cropped_image) if len(figure_rects) == 0: # text page = draw_rectangles(page, [coordinates], color=(0, 255, 0), annotate=True) - elif tables_in_image(cropped_image)[0]: # table - stats = parse_table(page) - page = draw_rectangles(page, stats, color=(255, 0, 0), annotate=True) + elif len(parse_table(cropped_image)) > 0: + #elif tables_in_image(cropped_image)[0]: # table + stats = parse_table(cropped_image) + cropped_image = draw_rectangles(cropped_image, stats, color=(255, 0, 0), annotate=True) + x,y,w,h = coordinates + page[y:y+h, x:x+w] = cropped_image else: # figure page = draw_rectangles(page, [coordinates], color=(0, 0, 255), annotate=True) diff --git a/cv_analysis/figure_detection.py b/cv_analysis/figure_detection.py index e1a67b8..f7fdf2b 100644 --- a/cv_analysis/figure_detection.py +++ b/cv_analysis/figure_detection.py @@ -9,7 +9,7 @@ from cv_analysis.utils.post_processing import remove_included from cv_analysis.utils.filters import is_large_enough, has_acceptable_format from cv_analysis.utils.text import remove_primary_text_regions from cv_analysis.utils.visual_logging import vizlogger - +#from PIL import Image def is_likely_figure(cont, min_area=5000, max_width_to_hight_ratio=6): return is_large_enough(cont, min_area) and has_acceptable_format(cont, max_width_to_hight_ratio) @@ -41,4 +41,18 @@ def detect_figures_in_pdf(pdf_path, page_index=1, show=False): vizlogger.debug(page, "figures03_final.png") if show: show_mpl(page) + return page +# pages = [] +# for i in range(0,16): +# pdf_path = "/home/lillian/ocr_docs/Report on spectra.pdf" +# page_index = i +# page = detect_figures_in_pdf(pdf_path,page_index) +# pages.append(Image.fromarray(page)) +# p1, p = pages[0], pages[1:] +# +# out_pdf_path = "/home/lillian/ocr_docs/out.pdf" +# +# p1.save( +# out_pdf_path, "PDF", resolution=150.0, save_all=True, append_images=p +# ) \ No newline at end of file diff --git a/cv_analysis/utils/text.py b/cv_analysis/utils/text.py index 6161db2..01f6c4b 100644 --- a/cv_analysis/utils/text.py +++ b/cv_analysis/utils/text.py @@ -19,6 +19,7 @@ def remove_primary_text_regions(image): for cnt in cnts: x, y, w, h = cv2.boundingRect(cnt) cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), -1) + #show_mpl(image) return image @@ -46,12 +47,12 @@ def find_primary_text_regions(image): image = cv2.threshold(image, 253, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] - close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (17, 7)) #20,3 - close = cv2.morphologyEx(image, cv2.MORPH_CLOSE, close_kernel, iterations=1) + close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 3)) #20,3 + close = cv2.morphologyEx(image, cv2.MORPH_CLOSE, close_kernel, iterations=2) #show_mpl(close) - dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(7, 4)) #5,3 + dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(7, 3)) #5,3 dilate = cv2.dilate(close, dilate_kernel, iterations=1) #show_mpl(dilate)