21 lines
865 B
Python

from pathlib import Path
from image_prediction.config import CONFIG
from image_prediction.pipeline import load_pipeline
def test_all_duplicate_images_are_filtered():
"""See RED-10765 (RM-241): Removed redactions reappear."""
pdf_path = Path(__file__).parents[1] / "data" / "RED-10765" / "RM-241-461c90d6d6dc0416ad5f0b05feef4dfc.UNTOUCHED_shortened.pdf"
pdf_bytes = pdf_path.read_bytes()
pipeline = load_pipeline(verbose=True, batch_size=CONFIG.service.batch_size)
predictions = list(pipeline(pdf_bytes))
seen = set()
for prediction in predictions:
key = (prediction['representation'], prediction['position']['x1'], prediction['position']['x2'], prediction['position']['y1'], prediction['position']['y2'], prediction['position']['pageNumber'])
assert key not in seen, f"Duplicate found: {key}"
seen.add(key)