from pathlib import Path from image_prediction.config import CONFIG from image_prediction.pipeline import load_pipeline def test_all_duplicate_images_are_filtered(): """See RED-10765 (RM-241): Removed redactions reappear.""" pdf_path = ( Path(__file__).parents[1] / "data" / "RED-10765" / "RM-241-461c90d6d6dc0416ad5f0b05feef4dfc.UNTOUCHED_shortened.pdf" ) pdf_bytes = pdf_path.read_bytes() pipeline = load_pipeline(verbose=True, batch_size=CONFIG.service.batch_size) predictions = list(pipeline(pdf_bytes)) seen = set() for prediction in predictions: key = ( prediction["position"]["x1"], prediction["position"]["x2"], prediction["position"]["y1"], prediction["position"]["y2"], prediction["position"]["pageNumber"], ) assert key not in seen, f"Duplicate found: {key}" seen.add(key) all_passed = sum(1 for prediction in predictions if prediction["filters"]["allPassed"]) assert all_passed == 1, f"Expected 1 image with allPassed flag set to True, but got {all_passed}" assert len(predictions) == 177, f"Expected 177 images, but got {len(predictions)}"