Julius Unverfehrt ce9e92876c Pull request #16: Add table parsing fixtures
Merge in RR/cv-analysis from add_table_parsing_fixtures to master

Squashed commit of the following:

commit cfc89b421b61082c8e92e1971c9d0bf4490fa07e
Merge: a7ecb05 73c66a8
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Mon Jul 11 12:19:01 2022 +0200

    Merge branch 'master' of ssh://git.iqser.com:2222/rr/cv-analysis into add_table_parsing_fixtures

commit a7ecb05b7d8327f0c7429180f63a380b61b06bc3
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Mon Jul 11 12:02:07 2022 +0200

    refactor

commit 466f217e5a9ee5c54fd38c6acd28d54fc38ff9bb
Author: llocarnini <lillian.locarnini@iqser.com>
Date:   Mon Jul 11 10:24:14 2022 +0200

    deleted unused imports and unused lines of code

commit c58955c8658d0631cdd1c24c8556d399e3fd9990
Author: llocarnini <lillian.locarnini@iqser.com>
Date:   Mon Jul 11 10:16:01 2022 +0200

    black reformatted files

commit f8bcb10a00ff7f0da49b80c1609b17997411985a
Author: llocarnini <lillian.locarnini@iqser.com>
Date:   Tue Jul 5 15:15:00 2022 +0200

    reformat files

commit 432e8a569fd70bd0745ce0549c2bfd2f2e907763
Author: llocarnini <lillian.locarnini@iqser.com>
Date:   Tue Jul 5 15:08:22 2022 +0200

    added better test for generic pages with table WIP as thicker lines create inconsistent results.
    added test for patchy tables which does not work yet

commit 2aac9ebf5c76bd963f8c136fe5dd4c2d7681b469
Author: llocarnini <lillian.locarnini@iqser.com>
Date:   Mon Jul 4 16:56:29 2022 +0200

    added new fixtures for table_parsing_test.py

commit 37606cac0301b13e99be2c16d95867477f29e7c4
Author: llocarnini <lillian.locarnini@iqser.com>
Date:   Fri Jul 1 16:02:44 2022 +0200

    added separate file for table parsing fixtures, where fixtures for generic tables were added. WIP tests for generic table fixtures
2022-07-11 12:25:16 +02:00

88 lines
2.5 KiB
Python

import numpy as np
from cv_analysis.utils.structures import Rectangle
def xyxy_from_object(box_object):
try:
x1, y1, x2, y2 = box_object.xyxy()
except:
try:
x1 = box_object["x"]
y1 = box_object["y"]
x2 = x1 + box_object["width"]
y2 = y1 + box_object["height"]
except:
x1, y1, x2, y2 = box_object
return x1, y1, x2, y2
def xywh_from_object(box_object):
try:
x, y, w, h = box_object.xywh()
except:
try:
x = box_object["x"]
y = box_object["y"]
w = box_object["width"]
h = box_object["height"]
except:
x, y, w, h = box_object
return x, y, w, h
def compute_iou_from_boxes(box1: Rectangle, box2: list):
"""
Each box of the form (x1, y1, delx, dely)
"""
ax1, ay1, aw, ah = xywh_from_object(box1)
bx1, by1, bw, bh = xywh_from_object(box2)
ax2, ay2, bx2, by2 = ax1 + aw, ay1 + ah, bx1 + bw, by1 + bh
if (ax1 > bx2) or (bx1 > ax2) or (ay1 > by2) or (by1 > ay2):
return 0
intersection = (min(ax2, bx2) - max(ax1, bx1)) * (min(ay2, by2) - max(ay1, by1))
area_a = (ax2 - ax1) * (ay2 - ay1)
area_b = (bx2 - bx1) * (by2 - by1)
union = area_a + area_b - intersection
return intersection / union
def find_max_overlap(box, box_list):
best_candidate = max(box_list, key=lambda x: compute_iou_from_boxes(box, x))
iou = compute_iou_from_boxes(box, best_candidate)
return best_candidate, iou
def compute_page_iou(results_box_list, gt_box_list):
results = results_box_list.copy()
gt = gt_box_list.copy()
if (not results) or (not gt):
return 0
iou_sum = 0
denominator = max(len(results), len(gt))
while gt and results:
gt_box = gt.pop()
best_match, best_iou = find_max_overlap(gt_box, results)
results.remove(best_match)
iou_sum += best_iou
score = iou_sum / denominator
return score
def compute_document_score(results_dict, annotation_dict):
page_weights = np.array([len(page["cells"]) for page in annotation_dict["pages"]])
page_weights = page_weights / sum(page_weights)
scores = []
for i in range(len(annotation_dict["pages"])):
scores.append(
compute_page_iou(
results_dict["pages"][i]["cells"], annotation_dict["pages"][i]["cells"]
)
)
scores = np.array(scores)
doc_score = np.average(scores, weights=page_weights)
return doc_score