Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e3f06da823 | ||
|
|
c25c8d764e | ||
|
|
dcab1e8616 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -25,3 +25,4 @@ build_venv/
|
|||||||
/data/metadata_testing_files.csv
|
/data/metadata_testing_files.csv
|
||||||
.coverage
|
.coverage
|
||||||
/data/
|
/data/
|
||||||
|
/venv/
|
||||||
@ -1,4 +1,3 @@
|
|||||||
import itertools
|
|
||||||
from itertools import compress
|
from itertools import compress
|
||||||
from itertools import starmap
|
from itertools import starmap
|
||||||
from operator import __and__
|
from operator import __and__
|
||||||
@ -6,17 +5,15 @@ from operator import __and__
|
|||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
from cv_analysis.utils.connect_rects import connect_related_rects2
|
from cv_analysis.utils.connect_rects import connect_related_rects2
|
||||||
from cv_analysis.utils.structures import Rectangle
|
|
||||||
from cv_analysis.utils.postprocessing import (
|
from cv_analysis.utils.postprocessing import (
|
||||||
remove_overlapping,
|
|
||||||
remove_included,
|
remove_included,
|
||||||
has_no_parent,
|
has_no_parent,
|
||||||
)
|
)
|
||||||
from cv_analysis.utils.visual_logging import vizlogger
|
from cv_analysis.utils.structures import Rectangle
|
||||||
|
|
||||||
#could be dynamic parameter is the scan is noisy
|
|
||||||
|
# could be dynamic parameter is the scan is noisy
|
||||||
def is_likely_segment(rect, min_area=100):
|
def is_likely_segment(rect, min_area=100):
|
||||||
return cv2.contourArea(rect, False) > min_area
|
return cv2.contourArea(rect, False) > min_area
|
||||||
|
|
||||||
@ -34,7 +31,7 @@ def find_segments(image):
|
|||||||
|
|
||||||
|
|
||||||
def dilate_page_components(image):
|
def dilate_page_components(image):
|
||||||
#if text is detected in words make kernel bigger
|
# if text is detected in words make kernel bigger
|
||||||
image = cv2.GaussianBlur(image, (7, 7), 0)
|
image = cv2.GaussianBlur(image, (7, 7), 0)
|
||||||
thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
|
thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
|
||||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
|
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
|
||||||
@ -49,7 +46,6 @@ def fill_in_component_area(image, rect):
|
|||||||
return ~image
|
return ~image
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parse_layout(image: np.array):
|
def parse_layout(image: np.array):
|
||||||
image = image.copy()
|
image = image.copy()
|
||||||
image_ = image.copy()
|
image_ = image.copy()
|
||||||
|
|||||||
@ -2,7 +2,8 @@ from functools import partial
|
|||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from iteration_utilities import starfilter, first
|
from iteration_utilities import first
|
||||||
|
from iteration_utilities._iteration_utilities import starfilter
|
||||||
|
|
||||||
from cv_analysis.utils.filters import is_large_enough, is_filled, is_boxy
|
from cv_analysis.utils.filters import is_large_enough, is_filled, is_boxy
|
||||||
from cv_analysis.utils.visual_logging import vizlogger
|
from cv_analysis.utils.visual_logging import vizlogger
|
||||||
|
|||||||
@ -1,7 +1,3 @@
|
|||||||
from functools import partial
|
|
||||||
from itertools import chain, starmap
|
|
||||||
from operator import attrgetter
|
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from funcy import lmap, lfilter
|
from funcy import lmap, lfilter
|
||||||
@ -130,10 +126,10 @@ def parse_tables(image: np.array, show=False):
|
|||||||
image = preprocess(image)
|
image = preprocess(image)
|
||||||
image = isolate_vertical_and_horizontal_components(image)
|
image = isolate_vertical_and_horizontal_components(image)
|
||||||
rects = turn_connected_components_into_rects(image)
|
rects = turn_connected_components_into_rects(image)
|
||||||
#print(rects, "\n\n")
|
# print(rects, "\n\n")
|
||||||
rects = list(map(Rectangle.from_xywh, rects))
|
rects = list(map(Rectangle.from_xywh, rects))
|
||||||
#print(rects, "\n\n")
|
# print(rects, "\n\n")
|
||||||
rects = remove_isolated(rects)
|
rects = remove_isolated(rects)
|
||||||
#print(rects, "\n\n")
|
# print(rects, "\n\n")
|
||||||
|
|
||||||
return rects
|
return rects
|
||||||
|
|||||||
@ -6,10 +6,14 @@ def is_near_enough(rect_pair, max_gap=14):
|
|||||||
x1, y1, w1, h1 = rect_pair[0]
|
x1, y1, w1, h1 = rect_pair[0]
|
||||||
x2, y2, w2, h2 = rect_pair[1]
|
x2, y2, w2, h2 = rect_pair[1]
|
||||||
|
|
||||||
return any([abs(x1 - (x2 + w2)) <= max_gap,
|
return any(
|
||||||
abs(x2 - (x1 + w1)) <= max_gap,
|
[
|
||||||
abs(y2 - (y1 + h1)) <= max_gap,
|
abs(x1 - (x2 + w2)) <= max_gap,
|
||||||
abs(y1 - (y2 + h2)) <= max_gap])
|
abs(x2 - (x1 + w1)) <= max_gap,
|
||||||
|
abs(y2 - (y1 + h1)) <= max_gap,
|
||||||
|
abs(y1 - (y2 + h2)) <= max_gap,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def is_overlapping(rect_pair):
|
def is_overlapping(rect_pair):
|
||||||
@ -23,28 +27,36 @@ def is_overlapping(rect_pair):
|
|||||||
def is_on_same_line(rect_pair):
|
def is_on_same_line(rect_pair):
|
||||||
x1, y1, w1, h1 = rect_pair[0]
|
x1, y1, w1, h1 = rect_pair[0]
|
||||||
x2, y2, w2, h2 = rect_pair[1]
|
x2, y2, w2, h2 = rect_pair[1]
|
||||||
return any([any([abs(y1 - y2) <= 10,
|
return any(
|
||||||
abs(y1 + h1 - (y2 + h2)) <= 10]),
|
[
|
||||||
any([y2 <= y1 and y1 + h1 <= y2 + h2,
|
any([abs(y1 - y2) <= 10, abs(y1 + h1 - (y2 + h2)) <= 10]),
|
||||||
y1 <= y2 and y2 + h2 <= y1 + h1])])
|
any([y2 <= y1 and y1 + h1 <= y2 + h2, y1 <= y2 and y2 + h2 <= y1 + h1]),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def has_correct_position1(rect_pair):
|
def has_correct_position1(rect_pair):
|
||||||
x1, y1, w1, h1 = rect_pair[0]
|
x1, y1, w1, h1 = rect_pair[0]
|
||||||
x2, y2, w2, h2 = rect_pair[1]
|
x2, y2, w2, h2 = rect_pair[1]
|
||||||
return any([any([abs(x1 - x2) <= 10,
|
return any(
|
||||||
abs(y1 - y2) <= 10,
|
[
|
||||||
abs(x1 + w1 - (x2 + w2)) <= 10,
|
any(
|
||||||
abs(y1 + h1 - (y2 + h2)) <= 10]),
|
[abs(x1 - x2) <= 10, abs(y1 - y2) <= 10, abs(x1 + w1 - (x2 + w2)) <= 10, abs(y1 + h1 - (y2 + h2)) <= 10]
|
||||||
any([y2 <= y1 and y1 + h1 <= y2 + h2,
|
),
|
||||||
y1 <= y2 and y2 + h2 <= y1 + h1,
|
any(
|
||||||
x2 <= x1 and x1 + w1 <= x2 + w2,
|
[
|
||||||
x1 <= x2 and x2 + w2 <= x1 + w1])])
|
y2 <= y1 and y1 + h1 <= y2 + h2,
|
||||||
|
y1 <= y2 and y2 + h2 <= y1 + h1,
|
||||||
|
x2 <= x1 and x1 + w1 <= x2 + w2,
|
||||||
|
x1 <= x2 and x2 + w2 <= x1 + w1,
|
||||||
|
]
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def is_related(rect_pair):
|
def is_related(rect_pair):
|
||||||
return (is_near_enough(rect_pair) and has_correct_position1(rect_pair)) or is_overlapping(
|
return (is_near_enough(rect_pair) and has_correct_position1(rect_pair)) or is_overlapping(rect_pair)
|
||||||
rect_pair)
|
|
||||||
|
|
||||||
|
|
||||||
def fuse_rects(rect1, rect2):
|
def fuse_rects(rect1, rect2):
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
from collections import namedtuple
|
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from itertools import starmap, compress
|
from itertools import starmap, compress
|
||||||
from typing import Iterable
|
from typing import Iterable
|
||||||
|
|
||||||
from cv_analysis.utils.structures import Rectangle
|
from cv_analysis.utils.structures import Rectangle
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -46,5 +46,6 @@ if __name__ == "__main__":
|
|||||||
from cv_analysis.layout_parsing import parse_layout as analyze
|
from cv_analysis.layout_parsing import parse_layout as analyze
|
||||||
elif args.type == "figure":
|
elif args.type == "figure":
|
||||||
from cv_analysis.figure_detection.figure_detection import detect_figures
|
from cv_analysis.figure_detection.figure_detection import detect_figures
|
||||||
|
|
||||||
analyze = detect_figures
|
analyze = detect_figures
|
||||||
annotate_page(page, analyze, draw, name=name, show=args.show)
|
annotate_page(page, analyze, draw, name=name, show=args.show)
|
||||||
|
|||||||
@ -1,6 +1,5 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import timeit
|
import timeit
|
||||||
from time import process_time
|
|
||||||
from itertools import starmap
|
from itertools import starmap
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@ -57,7 +56,6 @@ if __name__ == "__main__":
|
|||||||
t2 = timeit.default_timer()
|
t2 = timeit.default_timer()
|
||||||
save_as_pdf(annotated_pages, args.output_folder, Path(args.pdf_path).stem, args.type)
|
save_as_pdf(annotated_pages, args.output_folder, Path(args.pdf_path).stem, args.type)
|
||||||
t3 = timeit.default_timer()
|
t3 = timeit.default_timer()
|
||||||
print("[s] opening file and convert pdf pages to images: ", t1-t0)
|
print("[s] opening file and convert pdf pages to images: ", t1 - t0)
|
||||||
print("[s] analyse and annotate images: ", t2-t1)
|
print("[s] analyse and annotate images: ", t2 - t1)
|
||||||
print("[s] save images as pdf: ", t3-t2)
|
print("[s] save images as pdf: ", t3 - t2)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user