Isaac Riley beb40da3b1 Pull request #22: add single-cell filtering to table parsing and increase tolerance parameter to 7; refactor postprocessing to use the Rectangles data structure
Merge in RR/cv-analysis from remove_isolated to master

Squashed commit of the following:

commit 2613ed1615d1b69b3e4f2acea197993a91d00561
Author: Isaac Riley <Isaac.Riley@iqser.com>
Date:   Tue Aug 2 10:17:33 2022 +0200

    add single-cell filtering to table parsing and increase tolerance parameter to 7; refactored postprocessing to use the Rectangles data structure
2022-08-02 10:54:13 +02:00

62 lines
2.0 KiB
Python

from typing import Iterable
import numpy as np
from cv_analysis.utils.structures import Rectangle
def find_max_overlap(box: Rectangle, box_list: Iterable[Rectangle]):
best_candidate = max(box_list, key=lambda x: box.iou(x))
iou = box.iou(best_candidate)
return best_candidate, iou
def compute_page_iou(results_boxes: Iterable[Rectangle], ground_truth_boxes: Iterable[Rectangle]):
results = list(results_boxes)
truth = list(ground_truth_boxes)
if (not results) or (not truth):
return 0
iou_sum = 0
denominator = max(len(results), len(truth))
while results and truth:
gt_box = truth.pop()
best_match, best_iou = find_max_overlap(gt_box, results)
results.remove(best_match)
iou_sum += best_iou
score = iou_sum / denominator
return score
def compute_document_score(results_dict, annotation_dict):
page_weights = np.array([len(page["cells"]) for page in annotation_dict["pages"]])
page_weights = page_weights / sum(page_weights)
scores = []
for i in range(len(annotation_dict["pages"])):
scores.append(
compute_page_iou(
map(Rectangle.from_dict_xywh, results_dict["pages"][i]["cells"]),
map(Rectangle.from_dict_xywh, annotation_dict["pages"][i]["cells"]),
)
)
doc_score = np.average(np.array(scores), weights=page_weights)
return doc_score
"""
from cv_analysis.utils.test_metrics import *
r1 = Rectangle.from_dict_xywh({'x': 30, 'y': 40, 'width': 50, 'height': 60})
r2 = Rectangle.from_dict_xywh({'x': 40, 'y': 30, 'width': 55, 'height': 65})
r3 = Rectangle.from_dict_xywh({'x': 45, 'y': 35, 'width': 45, 'height': 55})
r4 = Rectangle.from_dict_xywh({'x': 25, 'y': 45, 'width': 45, 'height': 55})
d1 = {"pages": [{"cells": [r1.json_xywh(), r2.json_xywh()]}]}
d2 = {"pages": [{"cells": [r3.json_xywh(), r4.json_xywh()]}]}
compute_iou_from_boxes(r1, r2)
find_max_overlap(r1, [r2, r3, r4])
compute_page_iou([r1, r2], [r3, r4])
compute_document_score(d1, d2)
"""