integrated stitching into parsable pdf extractor
This commit is contained in:
parent
04cf0245ed
commit
585cdf5c70
@ -10,6 +10,7 @@ from tqdm import tqdm
|
||||
|
||||
from image_prediction.image_extractor.extractor import ImageExtractor, ImageMetadataPair
|
||||
from image_prediction.info import Info
|
||||
from image_prediction.stitching.stitching import stitch_pairs
|
||||
|
||||
rounder = rcompose(round, int)
|
||||
|
||||
@ -77,7 +78,10 @@ class ParsablePDFImageExtractor(ImageExtractor):
|
||||
metadata = get_metadata_for_images_on_page(page)
|
||||
get_image_infos.cache_clear()
|
||||
|
||||
return starmap(ImageMetadataPair, filter(compose(all, curry(map)(truth)), zip(images, metadata)))
|
||||
image_metadata_pairs = starmap(ImageMetadataPair, filter(compose(all, curry(map)(truth)), zip(images, metadata)))
|
||||
image_metadata_pairs = stitch_pairs(list(image_metadata_pairs))
|
||||
|
||||
return image_metadata_pairs
|
||||
|
||||
def extract(self, pdf: bytes, page_range: range = None):
|
||||
self.doc = fitz.Document(stream=pdf)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user