Merge in RR/cv-analysis from remove_isolated to master
Squashed commit of the following:
commit 2613ed1615d1b69b3e4f2acea197993a91d00561
Author: Isaac Riley <Isaac.Riley@iqser.com>
Date: Tue Aug 2 10:17:33 2022 +0200
add single-cell filtering to table parsing and increase tolerance parameter to 7; refactored postprocessing to use the Rectangles data structure
62 lines
2.0 KiB
Python
62 lines
2.0 KiB
Python
from typing import Iterable
|
|
import numpy as np
|
|
from cv_analysis.utils.structures import Rectangle
|
|
|
|
|
|
def find_max_overlap(box: Rectangle, box_list: Iterable[Rectangle]):
|
|
best_candidate = max(box_list, key=lambda x: box.iou(x))
|
|
iou = box.iou(best_candidate)
|
|
return best_candidate, iou
|
|
|
|
|
|
def compute_page_iou(results_boxes: Iterable[Rectangle], ground_truth_boxes: Iterable[Rectangle]):
|
|
results = list(results_boxes)
|
|
truth = list(ground_truth_boxes)
|
|
if (not results) or (not truth):
|
|
return 0
|
|
iou_sum = 0
|
|
denominator = max(len(results), len(truth))
|
|
while results and truth:
|
|
gt_box = truth.pop()
|
|
best_match, best_iou = find_max_overlap(gt_box, results)
|
|
results.remove(best_match)
|
|
iou_sum += best_iou
|
|
score = iou_sum / denominator
|
|
return score
|
|
|
|
|
|
def compute_document_score(results_dict, annotation_dict):
|
|
|
|
page_weights = np.array([len(page["cells"]) for page in annotation_dict["pages"]])
|
|
page_weights = page_weights / sum(page_weights)
|
|
|
|
scores = []
|
|
for i in range(len(annotation_dict["pages"])):
|
|
scores.append(
|
|
compute_page_iou(
|
|
map(Rectangle.from_dict_xywh, results_dict["pages"][i]["cells"]),
|
|
map(Rectangle.from_dict_xywh, annotation_dict["pages"][i]["cells"]),
|
|
)
|
|
)
|
|
|
|
doc_score = np.average(np.array(scores), weights=page_weights)
|
|
|
|
return doc_score
|
|
|
|
|
|
"""
|
|
from cv_analysis.utils.test_metrics import *
|
|
|
|
r1 = Rectangle.from_dict_xywh({'x': 30, 'y': 40, 'width': 50, 'height': 60})
|
|
r2 = Rectangle.from_dict_xywh({'x': 40, 'y': 30, 'width': 55, 'height': 65})
|
|
r3 = Rectangle.from_dict_xywh({'x': 45, 'y': 35, 'width': 45, 'height': 55})
|
|
r4 = Rectangle.from_dict_xywh({'x': 25, 'y': 45, 'width': 45, 'height': 55})
|
|
d1 = {"pages": [{"cells": [r1.json_xywh(), r2.json_xywh()]}]}
|
|
d2 = {"pages": [{"cells": [r3.json_xywh(), r4.json_xywh()]}]}
|
|
|
|
compute_iou_from_boxes(r1, r2)
|
|
find_max_overlap(r1, [r2, r3, r4])
|
|
compute_page_iou([r1, r2], [r3, r4])
|
|
compute_document_score(d1, d2)
|
|
"""
|