revert refactoring changes
- revert functional refactoring changes to be able to determine where the error described in the ticket comes from - change array normalization to dimensionally sparse arrays to reduce memory consumption
This commit is contained in:
parent
4d43e385c5
commit
2bc9c24f6a
@ -146,22 +146,24 @@ def xref_to_image(doc, xref) -> Union[Image.Image, None]:
|
||||
# NOTE: image extraction is done via pixmap to array, as this method is twice as fast as extraction via bytestream
|
||||
try:
|
||||
pixmap = fitz.Pixmap(doc, xref)
|
||||
array = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(pixmap.h, pixmap.w, pixmap.n)
|
||||
array = normalize_channels(array)
|
||||
array = convert_pixmap_to_array(pixmap)
|
||||
return Image.fromarray(array)
|
||||
except ValueError:
|
||||
logger.debug(f"Xref {xref} is invalid, skipping extraction ...")
|
||||
return
|
||||
|
||||
|
||||
def normalize_channels(array: np.ndarray):
|
||||
if not array.ndim == 3:
|
||||
array = np.expand_dims(array, axis=-1)
|
||||
def convert_pixmap_to_array(pixmap: fitz.fitz.Pixmap):
|
||||
array = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(pixmap.h, pixmap.w, pixmap.n)
|
||||
array = _normalize_channels(array)
|
||||
return array
|
||||
|
||||
if array.shape[-1] == 4:
|
||||
|
||||
def _normalize_channels(array: np.ndarray):
|
||||
if array.shape[-1] == 1:
|
||||
array = array[:, :, 0]
|
||||
elif array.shape[-1] == 4:
|
||||
array = array[..., :3]
|
||||
elif array.shape[-1] == 1:
|
||||
array = np.concatenate([array, array, array], axis=-1)
|
||||
elif array.shape[-1] != 3:
|
||||
logger.warning(f"Unexpected image format: {array.shape}.")
|
||||
raise ValueError(f"Unexpected image format: {array.shape}.")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user