diff --git a/src/image_prediction/pipeline.py b/src/image_prediction/pipeline.py index 8219a9d..8bf0ff0 100644 --- a/src/image_prediction/pipeline.py +++ b/src/image_prediction/pipeline.py @@ -79,6 +79,12 @@ class Pipeline: def filter_duplicates(metadata: Iterable[dict[str, Any]]) -> Iterable[dict[str, Any]]: """Filter out duplicate images from the `position` (image coordinates), `page` and `representation` (perceptual hash). See RED-10765 (RM-241): Removed redactions reappear for why this is necessary. + + Args: + metadata: Iterable of image metadata dicts. + + Returns: + Iterable of image metadata dicts with duplicates removed. """ seen = set() for item in metadata: