49 lines
1.6 KiB
Python
49 lines
1.6 KiB
Python
import random
|
|
from itertools import starmap
|
|
from operator import __eq__
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
from PIL.Image import Image
|
|
from funcy import compose, first
|
|
|
|
from image_prediction.encoder.encoders.hash_encoder import HashEncoder
|
|
from image_prediction.encoder.encoders.hash_encoder import hash_image
|
|
from image_prediction.image_extractor.extractors.parsable import ParsablePDFImageExtractor
|
|
from image_prediction.utils.generic import lift
|
|
|
|
|
|
def resize(image: Image):
|
|
factor = random.uniform(0.3, 2)
|
|
new_size = map(lambda x: int(x * factor), image.size)
|
|
return image.resize(new_size)
|
|
|
|
|
|
def close(a: str, b: str):
|
|
assert len(a) == len(b)
|
|
return sum(starmap(__eq__, zip(a, b))) / len(a) >= 0.75
|
|
|
|
|
|
@pytest.mark.xfail(reason="Stochastic test, may fail some amount of the time.")
|
|
def test_hash_encoder(images, hashed_images, base_patch_image):
|
|
encoder = HashEncoder()
|
|
assert list(encoder(images)) == hashed_images
|
|
|
|
hashed_resized = compose(first, encoder, lift(resize))([base_patch_image])
|
|
hashed = hash_image(base_patch_image)
|
|
assert close(hashed_resized, hashed)
|
|
|
|
|
|
def test_all_hashes_have_length_of_twentyfive():
|
|
"""See RED-3814: all hashes should have 25 characters."""
|
|
pdf_path = Path(__file__).parents[1] / "data" / "RED-3814" / "similarImages2.pdf"
|
|
pdf_bytes = pdf_path.read_bytes()
|
|
image_extractor = ParsablePDFImageExtractor()
|
|
image_metadata_pairs = list(image_extractor.extract(pdf_bytes))
|
|
images = [image for image, _ in image_metadata_pairs]
|
|
|
|
hash_encoder = HashEncoder()
|
|
hashes = list(hash_encoder.encode(images))
|
|
|
|
assert all(len(h) == 25 for h in hashes)
|