21 lines
865 B
Python
21 lines
865 B
Python
from pathlib import Path
|
|
|
|
from image_prediction.config import CONFIG
|
|
from image_prediction.pipeline import load_pipeline
|
|
|
|
|
|
def test_all_duplicate_images_are_filtered():
|
|
"""See RED-10765 (RM-241): Removed redactions reappear."""
|
|
pdf_path = Path(__file__).parents[1] / "data" / "RED-10765" / "RM-241-461c90d6d6dc0416ad5f0b05feef4dfc.UNTOUCHED_shortened.pdf"
|
|
pdf_bytes = pdf_path.read_bytes()
|
|
|
|
pipeline = load_pipeline(verbose=True, batch_size=CONFIG.service.batch_size)
|
|
predictions = list(pipeline(pdf_bytes))
|
|
|
|
seen = set()
|
|
for prediction in predictions:
|
|
key = (prediction['representation'], prediction['position']['x1'], prediction['position']['x2'], prediction['position']['y1'], prediction['position']['y2'], prediction['position']['pageNumber'])
|
|
assert key not in seen, f"Duplicate found: {key}"
|
|
seen.add(key)
|
|
|