From 585cdf5c706c524e6bfef1b3aa60526b9ef570d2 Mon Sep 17 00:00:00 2001 From: Matthias Bisping Date: Mon, 11 Apr 2022 13:57:10 +0200 Subject: [PATCH] integrated stitching into parsable pdf extractor --- image_prediction/image_extractor/extractors/parsable.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/image_prediction/image_extractor/extractors/parsable.py b/image_prediction/image_extractor/extractors/parsable.py index d601d45..9181492 100644 --- a/image_prediction/image_extractor/extractors/parsable.py +++ b/image_prediction/image_extractor/extractors/parsable.py @@ -10,6 +10,7 @@ from tqdm import tqdm from image_prediction.image_extractor.extractor import ImageExtractor, ImageMetadataPair from image_prediction.info import Info +from image_prediction.stitching.stitching import stitch_pairs rounder = rcompose(round, int) @@ -77,7 +78,10 @@ class ParsablePDFImageExtractor(ImageExtractor): metadata = get_metadata_for_images_on_page(page) get_image_infos.cache_clear() - return starmap(ImageMetadataPair, filter(compose(all, curry(map)(truth)), zip(images, metadata))) + image_metadata_pairs = starmap(ImageMetadataPair, filter(compose(all, curry(map)(truth)), zip(images, metadata))) + image_metadata_pairs = stitch_pairs(list(image_metadata_pairs)) + + return image_metadata_pairs def extract(self, pdf: bytes, page_range: range = None): self.doc = fitz.Document(stream=pdf)