37 lines
1.1 KiB
Python
37 lines
1.1 KiB
Python
from typing import Iterable
|
|
|
|
from PIL import Image
|
|
|
|
from image_prediction.encoder.encoder import Encoder
|
|
from image_prediction.utils import get_logger
|
|
|
|
logger = get_logger()
|
|
|
|
|
|
class HashEncoder(Encoder):
|
|
def encode(self, images: Iterable[Image.Image]):
|
|
yield from map(_monitored_hashing, images)
|
|
|
|
def __call__(self, images: Iterable[Image.Image], batch_size=16):
|
|
yield from self.encode(images)
|
|
|
|
|
|
def _monitored_hashing(image):
|
|
try: # RED-5170: fails if image is 'broken'
|
|
image_hash = hash_image(image)
|
|
except (OSError, Exception) as err:
|
|
logger.warn(f"{err}: Couldn't hash image, generate dummy hash.")
|
|
image_hash = "F" * 25
|
|
return image_hash
|
|
|
|
|
|
def hash_image(image: Image.Image):
|
|
"""See: https://stackoverflow.com/a/49692185/3578468"""
|
|
image = image.resize((10, 10), Image.ANTIALIAS)
|
|
image = image.convert("L")
|
|
pixel_data = list(image.getdata())
|
|
avg_pixel = sum(pixel_data) / len(pixel_data)
|
|
bits = "".join(["1" if (px >= avg_pixel) else "0" for px in pixel_data])
|
|
hex_representation = str(hex(int(bits, 2)))[2:][::-1].upper()
|
|
return hex_representation
|