add support for broken images to hash encoding

This commit is contained in:
Julius Unverfehrt 2022-08-30 14:48:08 +02:00
parent afaa0aefee
commit 3759bda2da
2 changed files with 14 additions and 2 deletions

View File

@ -3,16 +3,28 @@ from typing import Iterable
from PIL import Image from PIL import Image
from image_prediction.encoder.encoder import Encoder from image_prediction.encoder.encoder import Encoder
from image_prediction.utils import get_logger
logger = get_logger()
class HashEncoder(Encoder): class HashEncoder(Encoder):
def encode(self, images: Iterable[Image.Image]): def encode(self, images: Iterable[Image.Image]):
yield from map(hash_image, images) yield from map(_monitored_hashing, images)
def __call__(self, images: Iterable[Image.Image], batch_size=16): def __call__(self, images: Iterable[Image.Image], batch_size=16):
yield from self.encode(images) yield from self.encode(images)
def _monitored_hashing(image):
try: # RED-5170: fails if image is 'broken'
image_hash = hash_image(image)
except (OSError, Exception) as err:
logger.warn(f"{err}: Couldn't hash image, generate dummy hash.")
image_hash = "F" * 25
return image_hash
def hash_image(image: Image.Image): def hash_image(image: Image.Image):
"""See: https://stackoverflow.com/a/49692185/3578468""" """See: https://stackoverflow.com/a/49692185/3578468"""
image = image.resize((10, 10), Image.ANTIALIAS) image = image.resize((10, 10), Image.ANTIALIAS)

View File

@ -190,7 +190,7 @@ def concat_images(im1: Image, im2: Image, metadata: dict, axis):
try: # RED-5170: fails if image is 'broken' try: # RED-5170: fails if image is 'broken'
im_aggr.paste(im, box=box) im_aggr.paste(im, box=box)
except Exception as err: except (OSError, Exception) as err:
logger.warn( logger.warn(
f"{err}: Couldn't merge image, replace broken part by blank image and passthrough. (page: {metadata[Info.PAGE_IDX]})" f"{err}: Couldn't merge image, replace broken part by blank image and passthrough. (page: {metadata[Info.PAGE_IDX]})"
) )