integrated stitching into parsable pdf extractor

This commit is contained in:
Matthias Bisping 2022-04-11 13:57:10 +02:00
parent 04cf0245ed
commit 585cdf5c70

View File

@ -10,6 +10,7 @@ from tqdm import tqdm
from image_prediction.image_extractor.extractor import ImageExtractor, ImageMetadataPair
from image_prediction.info import Info
from image_prediction.stitching.stitching import stitch_pairs
rounder = rcompose(round, int)
@ -77,7 +78,10 @@ class ParsablePDFImageExtractor(ImageExtractor):
metadata = get_metadata_for_images_on_page(page)
get_image_infos.cache_clear()
return starmap(ImageMetadataPair, filter(compose(all, curry(map)(truth)), zip(images, metadata)))
image_metadata_pairs = starmap(ImageMetadataPair, filter(compose(all, curry(map)(truth)), zip(images, metadata)))
image_metadata_pairs = stitch_pairs(list(image_metadata_pairs))
return image_metadata_pairs
def extract(self, pdf: bytes, page_range: range = None):
self.doc = fitz.Document(stream=pdf)