36 lines
1.2 KiB
Python
36 lines
1.2 KiB
Python
from pathlib import Path
|
|
|
|
from image_prediction.config import CONFIG
|
|
from image_prediction.pipeline import load_pipeline
|
|
|
|
|
|
def test_all_duplicate_images_are_filtered():
|
|
"""See RED-10765 (RM-241): Removed redactions reappear."""
|
|
pdf_path = (
|
|
Path(__file__).parents[1]
|
|
/ "data"
|
|
/ "RED-10765"
|
|
/ "RM-241-461c90d6d6dc0416ad5f0b05feef4dfc.UNTOUCHED_shortened.pdf"
|
|
)
|
|
pdf_bytes = pdf_path.read_bytes()
|
|
|
|
pipeline = load_pipeline(verbose=True, batch_size=CONFIG.service.batch_size)
|
|
predictions = list(pipeline(pdf_bytes))
|
|
|
|
seen = set()
|
|
for prediction in predictions:
|
|
key = (
|
|
prediction["position"]["x1"],
|
|
prediction["position"]["x2"],
|
|
prediction["position"]["y1"],
|
|
prediction["position"]["y2"],
|
|
prediction["position"]["pageNumber"],
|
|
)
|
|
assert key not in seen, f"Duplicate found: {key}"
|
|
seen.add(key)
|
|
|
|
all_passed = sum(1 for prediction in predictions if prediction["filters"]["allPassed"])
|
|
assert all_passed == 1, f"Expected 1 image with allPassed flag set to True, but got {all_passed}"
|
|
|
|
assert len(predictions) == 177, f"Expected 177 images, but got {len(predictions)}"
|