chore: add tests to ensure no regression happens ever again

2024-08-30 10:55:50 +02:00 · 2024-08-30 10:55:50 +02:00 · b3a58d6777
commit b3a58d6777
parent c888453cc6
3 changed files with 39 additions and 16 deletions
--- a/test/regressions_tests/image_classification_test.py
+++ b/test/regressions_tests/image_classification_test.py
@ -0,0 +1,21 @@
+from pathlib import Path
+
+from funcy import first
+
+from image_prediction.config import CONFIG
+from image_prediction.pipeline import load_pipeline
+
+
+def test_image_classification_does_not_regress():
+    """See RED-9948: the predictions unexpectedly changed for some images. In the end the issue is the tensorflow
+    version. We ensure that the prediction of the image with the hash FA30F080F0C031CE17E8CF237 is inconclusive,
+    and that the flag all_passed is false."""
+    pdf_path = Path(__file__).parents[1] / "data" / "RED-9948" / "SYNGENTA_EFSA_sanitisation_GFL_v2.pdf"
+    pdf_bytes = pdf_path.read_bytes()
+
+    pipeline = load_pipeline(verbose=True, batch_size=CONFIG.service.batch_size)
+    predictions = list(pipeline(pdf_bytes))
+    predictions = first([x for x in predictions if x["representation"] == "FA30F080F0C031CE17E8CF237"])
+
+    assert predictions["filters"]["allPassed"] is False
+    assert predictions["filters"]["probability"]["unconfident"] is True
--- a/test/regressions_tests/image_hashing_test.py
+++ b/test/regressions_tests/image_hashing_test.py
@ -0,0 +1,18 @@
+from pathlib import Path
+
+from image_prediction.encoder.encoders.hash_encoder import HashEncoder
+from image_prediction.image_extractor.extractors.parsable import ParsablePDFImageExtractor
+
+
+def test_all_hashes_have_length_of_twentyfive():
+    """See RED-3814: all hashes should have 25 characters."""
+    pdf_path = Path(__file__).parents[1] / "data" / "RED-3814" / "similarImages2.pdf"
+    pdf_bytes = pdf_path.read_bytes()
+    image_extractor = ParsablePDFImageExtractor()
+    image_metadata_pairs = list(image_extractor.extract(pdf_bytes))
+    images = [image for image, _ in image_metadata_pairs]
+
+    hash_encoder = HashEncoder()
+    hashes = list(hash_encoder.encode(images))
+
+    assert all(len(h) == 25 for h in hashes)
--- a/test/unit_tests/encoder_test.py
+++ b/test/unit_tests/encoder_test.py
@ -1,7 +1,6 @@
 import random
 from itertools import starmap
 from operator import __eq__
-from pathlib import Path

 import pytest
 from PIL.Image import Image
@ -9,7 +8,6 @@ from funcy import compose, first

 from image_prediction.encoder.encoders.hash_encoder import HashEncoder
 from image_prediction.encoder.encoders.hash_encoder import hash_image
-from image_prediction.image_extractor.extractors.parsable import ParsablePDFImageExtractor
 from image_prediction.utils.generic import lift


@ -32,17 +30,3 @@ def test_hash_encoder(images, hashed_images, base_patch_image):
    hashed_resized = compose(first, encoder, lift(resize))([base_patch_image])
    hashed = hash_image(base_patch_image)
    assert close(hashed_resized, hashed)
-
-
-def test_all_hashes_have_length_of_twentyfive():
-    """See RED-3814: all hashes should have 25 characters."""
-    pdf_path = Path(__file__).parents[1] / "data" / "RED-3814" / "similarImages2.pdf"
-    pdf_bytes = pdf_path.read_bytes()
-    image_extractor = ParsablePDFImageExtractor()
-    image_metadata_pairs = list(image_extractor.extract(pdf_bytes))
-    images = [image for image, _ in image_metadata_pairs]
-
-    hash_encoder = HashEncoder()
-    hashes = list(hash_encoder.encode(images))
-
-    assert all(len(h) == 25 for h in hashes)