revert refactoring changes

- revert functional refactoring changes to be able
to determine where the error described in the ticket comes from
- change array normalization to dimensionally
sparse arrays to reduce memory consumption
This commit is contained in:
Julius Unverfehrt 2023-02-13 13:53:30 +01:00
parent 4d43e385c5
commit 2bc9c24f6a

View File

@ -146,22 +146,24 @@ def xref_to_image(doc, xref) -> Union[Image.Image, None]:
# NOTE: image extraction is done via pixmap to array, as this method is twice as fast as extraction via bytestream
try:
pixmap = fitz.Pixmap(doc, xref)
array = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(pixmap.h, pixmap.w, pixmap.n)
array = normalize_channels(array)
array = convert_pixmap_to_array(pixmap)
return Image.fromarray(array)
except ValueError:
logger.debug(f"Xref {xref} is invalid, skipping extraction ...")
return
def normalize_channels(array: np.ndarray):
if not array.ndim == 3:
array = np.expand_dims(array, axis=-1)
def convert_pixmap_to_array(pixmap: fitz.fitz.Pixmap):
array = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(pixmap.h, pixmap.w, pixmap.n)
array = _normalize_channels(array)
return array
if array.shape[-1] == 4:
def _normalize_channels(array: np.ndarray):
if array.shape[-1] == 1:
array = array[:, :, 0]
elif array.shape[-1] == 4:
array = array[..., :3]
elif array.shape[-1] == 1:
array = np.concatenate([array, array, array], axis=-1)
elif array.shape[-1] != 3:
logger.warning(f"Unexpected image format: {array.shape}.")
raise ValueError(f"Unexpected image format: {array.shape}.")