integrated stitching into parsable pdf extractor
This commit is contained in:
parent
04cf0245ed
commit
585cdf5c70
@ -10,6 +10,7 @@ from tqdm import tqdm
|
|||||||
|
|
||||||
from image_prediction.image_extractor.extractor import ImageExtractor, ImageMetadataPair
|
from image_prediction.image_extractor.extractor import ImageExtractor, ImageMetadataPair
|
||||||
from image_prediction.info import Info
|
from image_prediction.info import Info
|
||||||
|
from image_prediction.stitching.stitching import stitch_pairs
|
||||||
|
|
||||||
rounder = rcompose(round, int)
|
rounder = rcompose(round, int)
|
||||||
|
|
||||||
@ -77,7 +78,10 @@ class ParsablePDFImageExtractor(ImageExtractor):
|
|||||||
metadata = get_metadata_for_images_on_page(page)
|
metadata = get_metadata_for_images_on_page(page)
|
||||||
get_image_infos.cache_clear()
|
get_image_infos.cache_clear()
|
||||||
|
|
||||||
return starmap(ImageMetadataPair, filter(compose(all, curry(map)(truth)), zip(images, metadata)))
|
image_metadata_pairs = starmap(ImageMetadataPair, filter(compose(all, curry(map)(truth)), zip(images, metadata)))
|
||||||
|
image_metadata_pairs = stitch_pairs(list(image_metadata_pairs))
|
||||||
|
|
||||||
|
return image_metadata_pairs
|
||||||
|
|
||||||
def extract(self, pdf: bytes, page_range: range = None):
|
def extract(self, pdf: bytes, page_range: range = None):
|
||||||
self.doc = fitz.Document(stream=pdf)
|
self.doc = fitz.Document(stream=pdf)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user