From dee1aa1f01085e585bb3126b7883bd3d01090f17 Mon Sep 17 00:00:00 2001 From: deiflaender Date: Wed, 22 Dec 2021 10:35:13 +0100 Subject: [PATCH] RED-2440: Integrated image-service-v2 --- .../redaction-service-api-v1/pom.xml | 2 +- .../v1/server/classification/model/Page.java | 3 +- .../client/ImageClassificationClient.java | 15 - .../client/ImageClassificationResponse.java | 13 - .../controller/RedactionController.java | 6 +- .../server/parsing/PDFLinesTextStripper.java | 6 +- .../v1/server/parsing/PDFTextStripper.java | 3 +- .../v1/server/redaction/model/PdfImage.java | 1 + .../redaction/model/image/Classification.java | 14 + .../redaction/model/image/FilterGeometry.java | 10 + .../server/redaction/model/image/Filters.java | 11 + .../redaction/model/image/Geometry.java | 9 + .../redaction/model/image/ImageFormat.java | 12 + .../redaction/model/image/ImageMetadata.java | 12 + .../model/image/ImageServiceResponse.java | 15 + .../redaction/model/image/ImageSize.java | 12 + .../redaction/model/image/Position.java | 12 + .../redaction/model/image/Probability.java | 10 + .../redaction/service/AnalyzeService.java | 11 +- .../service/ImageClassificationService.java | 71 -- .../segmentation/ImageMergeService.java | 165 ----- .../v1/server/segmentation/ImageService.java | 61 ++ .../segmentation/PdfSegmentationService.java | 20 +- .../storage/RedactionStorageService.java | 2 + .../v1/server/RedactionIntegrationTest.java | 6 +- .../PdfSegmentationServiceTest.java | 56 +- .../test/resources/files/image_response.json | 686 ++++++++++++++++++ 27 files changed, 920 insertions(+), 324 deletions(-) delete mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/ImageClassificationClient.java delete mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/ImageClassificationResponse.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/Classification.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/FilterGeometry.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/Filters.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/Geometry.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/ImageFormat.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/ImageMetadata.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/ImageServiceResponse.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/ImageSize.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/Position.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/Probability.java delete mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ImageClassificationService.java delete mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/ImageMergeService.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/ImageService.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/resources/files/image_response.json diff --git a/redaction-service-v1/redaction-service-api-v1/pom.xml b/redaction-service-v1/redaction-service-api-v1/pom.xml index 49297b23..4036fd1d 100644 --- a/redaction-service-v1/redaction-service-api-v1/pom.xml +++ b/redaction-service-v1/redaction-service-api-v1/pom.xml @@ -12,7 +12,7 @@ redaction-service-api-v1 - 0.149.0 + 0.151.0 diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Page.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Page.java index 42bfa82e..1bc71c23 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Page.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Page.java @@ -7,6 +7,7 @@ import lombok.Data; import lombok.NonNull; import lombok.RequiredArgsConstructor; +import java.util.ArrayList; import java.util.List; @Data @@ -16,7 +17,7 @@ public class Page { @NonNull private List textBlocks; - private List images; + private List images = new ArrayList<>(); private Rectangle bodyTextFrame; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/ImageClassificationClient.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/ImageClassificationClient.java deleted file mode 100644 index 4517dd99..00000000 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/ImageClassificationClient.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.iqser.red.service.redaction.v1.server.client; - -import org.springframework.cloud.openfeign.FeignClient; -import org.springframework.http.MediaType; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestBody; -import org.springframework.web.multipart.MultipartFile; - -@FeignClient(name = "ImageClassificationResource", url = "${image-service.url}") -public interface ImageClassificationClient { - - @PostMapping(value = "/process_full_img", consumes = MediaType.MULTIPART_FORM_DATA_VALUE, produces = MediaType.APPLICATION_JSON_VALUE) - ImageClassificationResponse classify(@RequestBody MultipartFile file); - -} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/ImageClassificationResponse.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/ImageClassificationResponse.java deleted file mode 100644 index 81ae0643..00000000 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/ImageClassificationResponse.java +++ /dev/null @@ -1,13 +0,0 @@ -package com.iqser.red.service.redaction.v1.server.client; - -import lombok.AllArgsConstructor; -import lombok.Data; -import lombok.NoArgsConstructor; - -@Data -@NoArgsConstructor -@AllArgsConstructor -public class ImageClassificationResponse { - - private String category; -} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java index c75eee3e..0df2fba9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java @@ -88,7 +88,7 @@ public class RedactionController implements RedactionResource { var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest .getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); try { - Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream); + Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null); storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest .getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); @@ -116,7 +116,7 @@ public class RedactionController implements RedactionResource { var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest .getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); try { - Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream); + Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null); storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest .getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); @@ -145,7 +145,7 @@ public class RedactionController implements RedactionResource { try { var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest .getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); - classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, true); + classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null); } catch (Exception e) { throw new RedactionException(e); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java index 6e46257d..ba534a83 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java @@ -183,9 +183,9 @@ public class PDFLinesTextStripper extends PDFTextStripper { graphicsPath.clear(); break; - case OperatorName.DRAW_OBJECT: - processImageOperation(arguments); - break; +// case OperatorName.DRAW_OBJECT: +// processImageOperation(arguments); +// break; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFTextStripper.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFTextStripper.java index d6430715..cc2dce51 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFTextStripper.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFTextStripper.java @@ -389,8 +389,7 @@ public class PDFTextStripper extends LegacyPDFStreamEngine } } characterListMapping.clear(); - super.processPage(page); - writePage(); + super.processPage(page);writePage(); endPage(page); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/PdfImage.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/PdfImage.java index dbb390c0..d4ed9335 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/PdfImage.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/PdfImage.java @@ -16,6 +16,7 @@ public class PdfImage { private BufferedImage image; @NonNull private RedRectangle2D position; + @NonNull private ImageType imageType; private boolean isAppendedToParagraph; private boolean hasTransparency; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/Classification.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/Classification.java new file mode 100644 index 00000000..f756568f --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/Classification.java @@ -0,0 +1,14 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model.image; + +import java.util.HashMap; +import java.util.Map; + +import lombok.Data; + +@Data +public class Classification { + + private Map probabilities = new HashMap<>(); + private String label; + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/FilterGeometry.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/FilterGeometry.java new file mode 100644 index 00000000..9fdabb52 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/FilterGeometry.java @@ -0,0 +1,10 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model.image; + +import lombok.Data; + +@Data +public class FilterGeometry { + + private ImageSize imageSize; + private ImageFormat imageFormat; +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/Filters.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/Filters.java new file mode 100644 index 00000000..49469e34 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/Filters.java @@ -0,0 +1,11 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model.image; + +import lombok.Data; + +@Data +public class Filters { + + private FilterGeometry geometry; + private Probability probability; + private boolean allPassed; +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/Geometry.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/Geometry.java new file mode 100644 index 00000000..6ea063a2 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/Geometry.java @@ -0,0 +1,9 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model.image; + +import lombok.Data; + +@Data +public class Geometry { + private float width; + private float height; +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/ImageFormat.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/ImageFormat.java new file mode 100644 index 00000000..c015e583 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/ImageFormat.java @@ -0,0 +1,12 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model.image; + +import lombok.Data; + +@Data +public class ImageFormat { + + private float quotient; + private boolean tooTall; + private boolean tooWide; + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/ImageMetadata.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/ImageMetadata.java new file mode 100644 index 00000000..57a29ae3 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/ImageMetadata.java @@ -0,0 +1,12 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model.image; + +import lombok.Data; + +@Data +public class ImageMetadata { + + private Classification classification; + private Position position; + private Geometry geometry; + private Filters filters; +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/ImageServiceResponse.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/ImageServiceResponse.java new file mode 100644 index 00000000..078f58aa --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/ImageServiceResponse.java @@ -0,0 +1,15 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model.image; + +import java.util.ArrayList; +import java.util.List; + +import lombok.Data; + +@Data +public class ImageServiceResponse { + + private String dossierId; + private String fileId; + private List imageMetadata = new ArrayList<>(); + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/ImageSize.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/ImageSize.java new file mode 100644 index 00000000..06c04440 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/ImageSize.java @@ -0,0 +1,12 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model.image; + +import lombok.Data; + +@Data +public class ImageSize { + + private float quotient; + private boolean tooLarge; + private boolean tooSmall; + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/Position.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/Position.java new file mode 100644 index 00000000..3422f6aa --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/Position.java @@ -0,0 +1,12 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model.image; + +import lombok.Data; + +@Data +public class Position { + private float x1; + private float x2; + private float y1; + private float y2; + private int pageNumber; +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/Probability.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/Probability.java new file mode 100644 index 00000000..c7cf13c1 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/image/Probability.java @@ -0,0 +1,10 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model.image; + +import lombok.Data; + +@Data +public class Probability { + + private boolean unconfident; + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java index 7fa2721e..c9b28fde 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java @@ -26,8 +26,10 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncre import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryVersion; import com.iqser.red.service.redaction.v1.server.redaction.model.Image; import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities; +import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D; import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils; +import com.iqser.red.service.redaction.v1.server.segmentation.ImageService; import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService; import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; @@ -53,6 +55,7 @@ public class AnalyzeService { private final SectionTextBuilderService sectionTextBuilderService; private final SectionGridCreatorService sectionGridCreatorService; private final NerAnalyserService nerAnalyserService; + private final ImageService imageService; public void analyzeDocumentStructure(StructureAnalyzeRequest analyzeRequest) { @@ -63,9 +66,15 @@ public class AnalyzeService { Document classifiedDoc; try { + var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest .getDossierId(), analyzeRequest.getFileId(), FileType.ORIGIN)); - classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream); + + Map> pdfImages = null; + if(redactionServiceSettings.isEnableImageClassification()) { + pdfImages = imageService.convertImages(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); + } + classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, pdfImages); pageCount = classifiedDoc.getPages().size(); } catch (Exception e) { throw new RedactionException(e); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ImageClassificationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ImageClassificationService.java deleted file mode 100644 index 6a590353..00000000 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ImageClassificationService.java +++ /dev/null @@ -1,71 +0,0 @@ -package com.iqser.red.service.redaction.v1.server.redaction.service; - -import com.iqser.red.service.redaction.v1.server.classification.model.Page; -import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient; -import com.iqser.red.service.redaction.v1.server.client.ImageClassificationResponse; -import com.iqser.red.service.redaction.v1.server.client.MockMultipartFile; -import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType; -import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; -import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import org.springframework.stereotype.Service; - -import javax.imageio.ImageIO; -import java.io.ByteArrayOutputStream; - -@Slf4j -@Service -@RequiredArgsConstructor -public class ImageClassificationService { - - private final ImageClassificationClient imageClassificationClient; - private final RedactionServiceSettings settings; - - - public void classifyImages(Page page) { - - page.getImages().forEach(image -> { - - if (settings.isEnableImageClassification() && !isEntirePageImage(image, page)) { - - long start = System.currentTimeMillis(); - try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { - ImageIO.write(image.getImage(), "png", baos); - var mockFile = new MockMultipartFile("file", "Image.png", "image/png", baos.toByteArray()); - ImageClassificationResponse response = imageClassificationClient.classify(mockFile); - image.setImageType(ImageType.valueOf(response.getCategory())); - } catch (Exception e) { - log.error("Could not classify image", e); - image.setImageType(ImageType.OTHER); - } - log.info("Image classification took: " + (System.currentTimeMillis() - start)); - } else { - image.setImageType(ImageType.OTHER); - } - - image.getImage().flush(); - image.setImage(null); - - if (image.getImageType().equals(ImageType.OTHER)) { - page.getTextBlocks().forEach(textblock -> { - if (image.getPosition() - .contains(textblock.getMinX(), textblock.getMinY(), textblock.getWidth(), textblock.getHeight())) { - image.setImageType(ImageType.OCR); - } - }); - } - }); - - } - - private boolean isEntirePageImage(PdfImage image, Page page){ - double imageArea = image.getPosition().getHeight() * image.getPosition().getWidth(); - if(imageArea / page.getCropBoxArea() >= settings.getMaxImageCropboxRatio()){ - log.info("Skipping image classification because images is almost as large as the entire page"); - return true; - } - return false; - } - -} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/ImageMergeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/ImageMergeService.java deleted file mode 100644 index 4c58bcd8..00000000 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/ImageMergeService.java +++ /dev/null @@ -1,165 +0,0 @@ -package com.iqser.red.service.redaction.v1.server.segmentation; - -import java.awt.Graphics; -import java.awt.geom.Rectangle2D; -import java.awt.image.BufferedImage; -import java.util.ArrayList; -import java.util.List; - -import org.springframework.stereotype.Service; - -import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; - -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; - -@Slf4j -@Service -@RequiredArgsConstructor -public class ImageMergeService { - - - public List mergeImages(List images, int rotation){ - - List mergedList = processImages(images, rotation); - - List imagesInImage = new ArrayList<>(); - for(PdfImage image: mergedList){ - for (PdfImage inner: mergedList){ - if(image != inner && image.getPosition().contains(inner.getPosition().getX(), inner.getPosition().getY(), inner.getPosition().getWidth(), inner.getPosition().getHeight())){ - imagesInImage.add(inner); - } - } - } - mergedList.removeAll(imagesInImage); - - return mergedList; - } - - - //merge images, if they are separated during pdf import, return new list of Pdfimages - private List processImages(List imageList, int rotation) { - if (imageList.size() > 1) { - List mergedList = new ArrayList<>(); - int countElementsInList = 0; - boolean beginImage = true; - - // a List of Boolean, true = candidate for merging, false = no merging - List candidatesList = getCandidatesList(imageList, rotation); - - // loop through list, if there are candidates for merging (true), merge images and add it to mergedList - for (int i = 0; i < candidatesList.size(); i++) { - if (candidatesList.get(i)) { - if (beginImage) { - //begin of image, merge two parts of imageList - PdfImage mergedImage = mergeTwoImages(imageList.get(i), imageList.get(i + 1), rotation); - // image merge successful - if (mergedImage != null) { - mergedList.add(mergedImage); - countElementsInList++; - } - } else { - //middle of an image, merge current piece auf mergedList with image of imageList - PdfImage mergedImage = mergeTwoImages(mergedList.get(countElementsInList - 1), imageList.get(i + 1), rotation); - // image merge successful - if (mergedImage != null) { - mergedList.set(countElementsInList - 1, mergedImage); - } - } - beginImage = false; - } else { - // if the last candidate is false, then both images i and i+1 must be added - if (i == candidatesList.size() - 1) { - if (countElementsInList > 0 && mergedList.get(countElementsInList - 1) == imageList.get(i)) { - mergedList.add(imageList.get(i + 1)); - } else { - mergedList.add(imageList.get(i)); - mergedList.add(imageList.get(i + 1)); - } - } else { - //first image is not splitted, add i to resultlist - if (beginImage) { - mergedList.add(imageList.get(i)); - countElementsInList++; - } else { - // i is the end of an image, add begin of new image - mergedList.add(imageList.get(i + 1)); - countElementsInList++; - beginImage = false; - } - } - } - } - return mergedList; - } else { - return imageList; - } - } - - private PdfImage mergeTwoImages(PdfImage image1, PdfImage image2, int rotation) { - - // diese Angaben von getPosition scheinen nicht richtig zu sein, damit werden teile des Bildes abgeschnitten - double width = image1.getPosition().getWidth(); - double width2 = image2.getPosition().getWidth(); - double height1 = image1.getPosition().getHeight(); - double height2 = image2.getPosition().getHeight(); - // mit den Werten, die unter Image gespeichert sind, funktioniert es - double img1height = image1.getImage().getHeight(); - double img1width = image1.getImage().getWidth(); - double img2height = image2.getImage().getHeight(); - - BufferedImage mergedImage = new BufferedImage((int) img1width, (int) (img1height + img2height), BufferedImage.TYPE_INT_RGB); - Graphics mergedImageGraphics = mergedImage.getGraphics(); - try { - mergedImageGraphics.drawImage(image1.getImage(), 0, 0, null); - mergedImageGraphics.drawImage(image2.getImage(), 0, (int) (img1height), null); - - // set Image, Position and type for merged Image - //set position for merged image with values of image1 and the height of both - Rectangle2D pos = new Rectangle2D.Float(); - pos.setRect(image1.getPosition().getX(), image2.getPosition().getY(), rotation == 90 ? width + width2: width, rotation == 90 ? height1 : height1 + height2); - PdfImage newPdfImage = new PdfImage(mergedImage, pos, image1.getPage(), image1.isHasTransparency() || image2.isHasTransparency()); - // Graphics need to be disposed - - image1.getImage().flush(); - image2.getImage().flush(); - - mergedImage.flush(); - mergedImageGraphics.dispose(); - - return newPdfImage; - } catch (Exception e) { - // failed to merge image - log.error("Failed to merge image", e); - return null; - } - - - } - - //make a list of true and false, if the image is a candidate for merging - private List getCandidatesList(List imageList, int rotation) { - List candidatesList = new ArrayList<>(); - for (int i = 0; i < imageList.size(); i++) { - if (i >= 1) { - candidatesList.add(isCandidateForMerging(imageList.get(i - 1), imageList.get(i), rotation)); - } - } - return candidatesList; - } - - // evaluate if two images are candidates for merging, depending on their coordinates, width and height - private boolean isCandidateForMerging(PdfImage image1, PdfImage image2, int rotation) { - double x1 = rotation == 90 ? image1.getPosition().getY() : image1.getPosition().getX(); - double y1 = rotation == 90 ? image1.getPosition().getX() : image1.getPosition().getY(); - double width1 = rotation == 90 ? image1.getPosition().getHeight() : image1.getPosition().getWidth(); - double x2 = rotation == 90 ? image2.getPosition().getY() : image2.getPosition().getX(); - double y2 = rotation == 90 ? image2.getPosition().getX() : image2.getPosition().getY(); - double width2 = rotation == 90 ? image2.getPosition().getHeight() : image2.getPosition().getWidth(); - double height2 = rotation == 90 ? image2.getPosition().getWidth() : image2.getPosition().getHeight(); - //if the x-coordinates and widths of images are equal and the height is equal to difference between y-coordinates, - // then it is the same picture and has to be merged -> return true - return x1 == x2 && width1 == width2 && Math.ceil(height2) == Math.ceil(rotation == 90 ? y2 - y1 : y1 - y2) && width2 > (height2 / 6); - } - -} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/ImageService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/ImageService.java new file mode 100644 index 00000000..1d3882aa --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/ImageService.java @@ -0,0 +1,61 @@ +package com.iqser.red.service.redaction.v1.server.segmentation; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +import org.springframework.stereotype.Service; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType; +import com.iqser.red.service.redaction.v1.server.classification.model.Page; +import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType; +import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; +import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D; +import com.iqser.red.service.redaction.v1.server.redaction.model.image.ImageServiceResponse; +import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; + +import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; + +@Service +@RequiredArgsConstructor +public class ImageService { + + private final ObjectMapper objectMapper; + private final RedactionStorageService redactionStorageService; + + @SneakyThrows + public Map> convertImages(String dossierId, String fileId){ + + var imageClassificationStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMAGE_INFO)); + ImageServiceResponse imageServiceResponse = objectMapper.readValue(imageClassificationStream, ImageServiceResponse.class); + + Map> images = new HashMap<>(); + imageServiceResponse.getImageMetadata().stream().forEach(imageMetadata -> { + var classification = imageMetadata.getFilters().isAllPassed() ? ImageType.valueOf(imageMetadata.getClassification().getLabel().toUpperCase(Locale.ROOT)) : ImageType.OTHER; + images.computeIfAbsent(imageMetadata.getPosition().getPageNumber() ,x -> new ArrayList<>()) + .add(new PdfImage(new RedRectangle2D(imageMetadata.getPosition().getX1(), imageMetadata.getPosition().getY1(), imageMetadata.getGeometry().getWidth(), imageMetadata.getGeometry().getHeight()), classification, imageMetadata.getPosition().getPageNumber())); + }); + + return images; + } + + + public void findOcr(Page page){ + page.getImages().forEach(image -> { + if (image.getImageType().equals(ImageType.OTHER)) { + page.getTextBlocks().forEach(textblock -> { + if (image.getPosition() + .contains(textblock.getMinX(), textblock.getMinY(), textblock.getWidth(), textblock.getHeight())) { + image.setImageType(ImageType.OCR); + } + }); + } + }); + } + + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java index 7c577f48..a3c2ed78 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java @@ -10,6 +10,7 @@ import java.nio.file.attribute.PosixFilePermission; import java.nio.file.attribute.PosixFilePermissions; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.Set; import org.apache.commons.io.IOUtils; @@ -29,7 +30,6 @@ import com.iqser.red.service.redaction.v1.server.memory.MemoryStats; import com.iqser.red.service.redaction.v1.server.parsing.PDFLinesTextStripper; import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; -import com.iqser.red.service.redaction.v1.server.redaction.service.ImageClassificationService; import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings; import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService; @@ -50,17 +50,10 @@ public class PdfSegmentationService { private final BlockificationService blockificationService; private final ClassificationService classificationService; private final SectionsBuilderService sectionsBuilderService; - private final ImageClassificationService imageClassificationService; - private final ImageMergeService imageMergeService; + private final ImageService imageService; - public Document parseDocument(InputStream documentInputStream) throws IOException { - - return parseDocument(documentInputStream, false); - } - - - public Document parseDocument(InputStream documentInputStream, boolean ignoreImages) throws IOException { + public Document parseDocument(InputStream documentInputStream, Map> pdfImages) throws IOException { PDDocument pdDocument = null; try { @@ -122,15 +115,14 @@ public class PdfSegmentationService { page.setLandscape(isLandscape || isRotated); page.setPageNumber(pageNumber); - List mergedList = imageMergeService.mergeImages(stripper.getImages(), rotation); - page.setImages(mergedList); tableExtractionService.extractTables(cleanRulings, page); buildPageStatistics(page); increaseDocumentStatistics(page, document); - if (!ignoreImages) { - imageClassificationService.classifyImages(page); + if (pdfImages != null && pdfImages.containsKey(pageNumber)) { + page.setImages(pdfImages.get(pageNumber)); + imageService.findOcr(page); } pages.add(page); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java index fe86d1d2..b66add64 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java @@ -127,6 +127,8 @@ public class RedactionStorageService { return dossierId + "/" + fileId + "." + fileType.name() + fileType.getExtension(); } + + } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 1cb6e780..cee85a84 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -75,7 +75,6 @@ import com.iqser.red.service.redaction.v1.model.RedactionResult; import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest; import com.iqser.red.service.redaction.v1.server.classification.model.SectionText; import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; -import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient; import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient; import com.iqser.red.service.redaction.v1.server.client.RulesClient; import com.iqser.red.service.redaction.v1.server.controller.RedactionController; @@ -137,9 +136,6 @@ public class RedactionIntegrationTest { @MockBean private DictionaryClient dictionaryClient; - @MockBean - private ImageClassificationClient imageClassificationClient; - @Autowired private RedactionStorageService redactionStorageService; @@ -899,7 +895,7 @@ public class RedactionIntegrationTest { @Test public void redactionTest() throws IOException { - String fileName = "files/new/S416.pdf"; + String fileName = "files/new/Single Study - Oral (Gavage) Mouse.pdf"; String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf"; long start = System.currentTimeMillis(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java index f773d281..1e1ab146 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java @@ -1,12 +1,16 @@ package com.iqser.red.service.redaction.v1.server.segmentation; import com.amazonaws.services.s3.AmazonS3; +import com.fasterxml.jackson.databind.ObjectMapper; import com.iqser.red.service.redaction.v1.server.Application; import com.iqser.red.service.redaction.v1.server.classification.model.Document; import com.iqser.red.service.redaction.v1.server.classification.model.Page; import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService; import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient; +import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType; import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; +import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D; +import com.iqser.red.service.redaction.v1.server.redaction.model.image.ImageServiceResponse; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService; @@ -31,13 +35,18 @@ import javax.imageio.ImageIO; import java.io.ByteArrayOutputStream; import java.io.FileOutputStream; import java.io.IOException; +import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Locale; +import java.util.Map; import java.util.stream.Collectors; import static com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils.getTemporaryDirectory; import static org.assertj.core.api.Assertions.assertThat; +import lombok.SneakyThrows; @RunWith(SpringRunner.class) @SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT) @@ -68,42 +77,29 @@ public class PdfSegmentationServiceTest { @MockBean private LegalBasisClient legalBasisClient; + @Autowired + private ObjectMapper objectMapper; + @Configuration @EnableAutoConfiguration(exclude = { RabbitAutoConfiguration.class}) public static class TestConfiguration { } - @Test - public void testMergeImages() throws IOException { - - ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/270Rotated.pdf"); - - Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - assertThat(document.getPages().get(0).getImages().size()).isEqualTo(1); - assertThat(document.getPages().get(1).getImages().size()).isEqualTo(0); - - } @Test - @Ignore - public void testExtractImages() throws IOException { + @SneakyThrows + public void testMapping(){ + ClassPathResource responseJson = new ClassPathResource("files/image_response.json"); + ImageServiceResponse imageServiceResponse = objectMapper.readValue(responseJson.getInputStream(), ImageServiceResponse.class); - ClassPathResource pdfFileResource = new ClassPathResource("files/new/Single Study - Oral (Gavage) Mouse.pdf"); + Map> images = new HashMap<>(); + imageServiceResponse.getImageMetadata().stream().forEach(imageMetadata -> { + images.computeIfAbsent(imageMetadata.getPosition().getPageNumber() ,x -> new ArrayList<>()) + .add(new PdfImage(new RedRectangle2D(imageMetadata.getPosition().getX1(), imageMetadata.getPosition().getY1(), imageMetadata.getGeometry().getWidth(), imageMetadata.getGeometry().getHeight()), ImageType.valueOf(imageMetadata.getClassification().getLabel().toUpperCase(Locale.ROOT)), imageMetadata.getPosition().getPageNumber())); + }); - Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - int i = 0; - for (Page page : document.getPages()) { - for (PdfImage image : page.getImages()) { - try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { - ImageIO.write(image.getImage(), "png", baos); - try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Image " + i + ".png")) { - fileOutputStream.write(baos.toByteArray()); - } - } - i++; - } - } + System.out.println("object"); } @@ -112,7 +108,7 @@ public class PdfSegmentationServiceTest { ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf"); - Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); + Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream(), null); assertThat(document.getParagraphs() .stream() .flatMap(paragraph -> paragraph.getTables().stream()) @@ -133,7 +129,7 @@ public class PdfSegmentationServiceTest { ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Table.pdf"); - Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); + Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream(), null); assertThat(document.getParagraphs() .stream() .flatMap(paragraph -> paragraph.getTables().stream()) @@ -171,7 +167,7 @@ public class PdfSegmentationServiceTest { ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Multi Page Table.pdf"); - Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); + Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream(), null); assertThat(document.getParagraphs() .stream() .flatMap(paragraph -> paragraph.getTables().stream()) @@ -209,7 +205,7 @@ public class PdfSegmentationServiceTest { ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Rotated Table Headers.pdf"); - Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); + Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream(), null); assertThat(document.getParagraphs() .stream() .flatMap(paragraph -> paragraph.getTables().stream()) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/image_response.json b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/image_response.json new file mode 100644 index 00000000..75279536 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/image_response.json @@ -0,0 +1,686 @@ +{ + "dossierId": "f889853e-4bf8-49a9-aae5-c38605c6ef40", + "fileId": "22ef63e29bb2a27db8497272336f6b32", + "imageMetadata": [ + { + "classification": { + "probabilities": { + "logo": 1.0, + "signature": 0.0, + "other": 0.0, + "formula": 0.0 + }, + "label": "logo" + }, + "position": { + "x1": 89.88, + "x2": 274.20000000000005, + "y1": 716.24, + "y2": 770.0, + "pageNumber": 1 + }, + "geometry": { + "width": 184.32000000000005, + "height": 53.75999999999999 + }, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.14298074612038092, + "tooLarge": false, + "tooSmall": false + }, + "imageFormat": { + "quotient": 3.42857142857143, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": true + } + }, + { + "classification": { + "probabilities": { + "signature": 0.999968, + "logo": 1.6e-05, + "other": 1.6e-05, + "formula": 0.0 + }, + "label": "signature" + }, + "position": { + "x1": -0.10000600000000001, + "x2": 595.099994, + "y1": -0.07998660000000002, + "y2": 842.0800134, + "pageNumber": 3 + }, + "geometry": { + "width": 595.2, + "height": 842.16 + }, + "filters": { + "geometry": { + "imageSize": { + "quotient": 1.0000782051152328, + "tooLarge": true, + "tooSmall": false + }, + "imageFormat": { + "quotient": 0.706754060986036, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": false + } + }, + { + "classification": { + "probabilities": { + "signature": 0.999872, + "other": 7.9e-05, + "logo": 4.8e-05, + "formula": 0.0 + }, + "label": "signature" + }, + "position": { + "x1": -0.10000600000000001, + "x2": 595.099994, + "y1": -0.07998660000000002, + "y2": 842.0800134, + "pageNumber": 7 + }, + "geometry": { + "width": 595.2, + "height": 842.16 + }, + "filters": { + "geometry": { + "imageSize": { + "quotient": 1.0000782051152328, + "tooLarge": true, + "tooSmall": false + }, + "imageFormat": { + "quotient": 0.706754060986036, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": false + } + }, + { + "classification": { + "probabilities": { + "signature": 0.996366, + "other": 0.00, + "logo": 2.3e-05, + "formula": 4e-06 + }, + "label": "signature" + }, + "position": { + "x1": -0.10000600000000001, + "x2": 595.099994, + "y1": -0.07998660000000002, + "y2": 842.0800134, + "pageNumber": 8 + }, + "geometry": { + "width": 595.2, + "height": 842.16 + }, + "filters": { + "geometry": { + "imageSize": { + "quotient": 1.0002630764355351, + "tooLarge": true, + "tooSmall": false + }, + "imageFormat": { + "quotient": 0.706754060986036, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": false + } + }, + { + "classification": { + "probabilities": { + "signature": 0.999772, + "logo": 0.000131, + "other": 9.7e-05, + "formula": 0.0 + }, + "label": "signature" + }, + "position": { + "x1": 82.59443842482001, + "x2": 512.6365568843402, + "y1": 116.943736387567, + "y2": 725.0718450317352, + "pageNumber": 73 + }, + "geometry": { + "width": 430.04211845952017, + "height": 608.1281086441682 + }, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.72236755521117, + "tooLarge": false, + "tooSmall": false + }, + "imageFormat": { + "quotient": 0.7071571143427432, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": true + } + }, + { + "classification": { + "probabilities": { + "signature": 1.0, + "logo": 0.0, + "other": 0.0, + "formula": 0.0 + }, + "label": "signature" + }, + "position": { + "x1": 328.20483600000006, + "x2": 393.94460940000005, + "y1": 175.1643178, + "y2": 203.92865619999998, + "pageNumber": 81 + }, + "geometry": { + "width": 65.73977339999999, + "height": 28.764338399999986 + }, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.06142518774572455, + "tooLarge": false, + "tooSmall": false + }, + "imageFormat": { + "quotient": 2.2854609929078022, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": true + } + }, + { + "classification": { + "probabilities": { + "signature": 1.0, + "logo": 0.0, + "other": 0.0, + "formula": 0.0 + }, + "label": "signature" + }, + "position": { + "x1": 136.5955818, + "x2": 224.72461859999999, + "y1": 175.1133172, + "y2": 203.97965680000001, + "pageNumber": 81 + }, + "geometry": { + "width": 88.1290368, + "height": 28.866339600000003 + }, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.07124601312700823, + "tooLarge": false, + "tooSmall": false + }, + "imageFormat": { + "quotient": 3.053003533568904, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": true + } + }, + { + "classification": { + "probabilities": { + "logo": 1.0, + "signature": 0.0, + "other": 0.0, + "formula": 0.0 + }, + "label": "logo" + }, + "position": { + "x1": 194.99126880000003, + "x2": 399.80967840000005, + "y1": 554.6597824, + "y2": 686.2413304, + "pageNumber": 81 + }, + "geometry": { + "width": 204.81840960000002, + "height": 131.581548 + }, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.23189275858788796, + "tooLarge": false, + "tooSmall": false + }, + "imageFormat": { + "quotient": 1.5565891472868219, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": true + } + }, + { + "classification": { + "probabilities": { + "other": 1.0, + "signature": 0.0, + "formula": 0.0, + "logo": 0.0 + }, + "label": "other" + }, + "position": { + "x1": 133.9945512, + "x2": 242.52382799999998, + "y1": 411.24609519999996, + "y2": 523.2434128, + "pageNumber": 90 + }, + "geometry": { + "width": 108.52927679999999, + "height": 111.99731760000003 + }, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.15573364968831904, + "tooLarge": false, + "tooSmall": false + }, + "imageFormat": { + "quotient": 0.9690346083788703, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": true + } + }, + { + "classification": { + "probabilities": { + "other": 1.0, + "signature": 0.0, + "formula": 0.0, + "logo": 0.0 + }, + "label": "other" + }, + "position": { + "x1": 133.5865464, + "x2": 242.3198256, + "y1": 274.972492, + "y2": 387.7858192, + "pageNumber": 90 + }, + "geometry": { + "width": 108.7332792, + "height": 112.8133272 + }, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.15644678522591335, + "tooLarge": false, + "tooSmall": false + }, + "imageFormat": { + "quotient": 0.9638336347197106, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": true + } + }, + { + "classification": { + "probabilities": { + "other": 1.0, + "signature": 0.0, + "formula": 0.0, + "logo": 0.0 + }, + "label": "other" + }, + "position": { + "x1": 246.19587120000003, + "x2": 356.5611696, + "y1": 400.84197279999995, + "y2": 519.3673672, + "pageNumber": 90 + }, + "geometry": { + "width": 110.3652984, + "height": 118.52539440000004 + }, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.1615575178049721, + "tooLarge": false, + "tooSmall": false + }, + "imageFormat": { + "quotient": 0.9311531841652321, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": true + } + }, + { + "classification": { + "probabilities": { + "other": 1.0, + "signature": 0.0, + "formula": 0.0, + "logo": 0.0 + }, + "label": "other" + }, + "position": { + "x1": 244.9718568, + "x2": 358.3971912, + "y1": 274.972492, + "y2": 387.7858192, + "pageNumber": 90 + }, + "geometry": { + "width": 113.4253344, + "height": 112.8133272 + }, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.15978662903260646, + "tooLarge": false, + "tooSmall": false + }, + "imageFormat": { + "quotient": 1.0054249547920433, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": true + } + }, + { + "classification": { + "probabilities": { + "other": 1.0, + "signature": 0.0, + "formula": 0.0, + "logo": 0.0 + }, + "label": "other" + }, + "position": { + "x1": 254.9679744, + "x2": 371.6573472, + "y1": 439.6024288, + "y2": 564.0438928, + "pageNumber": 91 + }, + "geometry": { + "width": 116.6893728, + "height": 124.441464 + }, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.17021718544102565, + "tooLarge": false, + "tooSmall": false + }, + "imageFormat": { + "quotient": 0.9377049180327869, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": true + } + }, + { + "classification": { + "probabilities": { + "other": 1.0, + "signature": 0.0, + "formula": 0.0, + "logo": 0.0 + }, + "label": "other" + }, + "position": { + "x1": 133.9945512, + "x2": 249.663912, + "y1": 443.07046959999997, + "y2": 687.2613424, + "pageNumber": 91 + }, + "geometry": { + "width": 115.66936080000002, + "height": 244.19087280000002 + }, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.23739910530627284, + "tooLarge": false, + "tooSmall": false + }, + "imageFormat": { + "quotient": 0.4736842105263158, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": true + } + }, + { + "classification": { + "probabilities": { + "other": 1.0, + "signature": 0.0, + "formula": 0.0, + "logo": 0.0 + }, + "label": "other" + }, + "position": { + "x1": 105.84222, + "x2": 374.870385, + "y1": 526.40545, + "y2": 687.05734, + "pageNumber": 92 + }, + "geometry": { + "width": 269.028165, + "height": 160.65188999999998 + }, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.2936614851112628, + "tooLarge": false, + "tooSmall": false + }, + "imageFormat": { + "quotient": 1.6746031746031749, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": true + } + }, + { + "classification": { + "probabilities": { + "logo": 0.788068, + "other": 0.152259, + "formula": 0.036883, + "signature": 0.02279 + }, + "label": "logo" + }, + "position": { + "x1": 44.64999049990001, + "x2": 550.5759424999001, + "y1": 63.286004150029996, + "y2": 778.72242095003, + "pageNumber": 94 + }, + "geometry": { + "width": 505.92595200000005, + "height": 715.4364168000001 + }, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.8498341845521462, + "tooLarge": true, + "tooSmall": false + }, + "imageFormat": { + "quotient": 0.7071571143427431, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": false + } + }, + { + "classification": { + "probabilities": { + "signature": 0.998335, + "logo": 0.000955, + "other": 0.000703, + "formula": 7e-06 + }, + "label": "signature" + }, + "position": { + "x1": 58.954005540029996, + "x2": 536.45979618003, + "y1": 83.94401504006001, + "y2": 758.05854296006, + "pageNumber": 95 + }, + "geometry": { + "width": 477.50579064000004, + "height": 674.11452792 + }, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.8014221863697041, + "tooLarge": true, + "tooSmall": false + }, + "imageFormat": { + "quotient": 0.7083452007974936, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": false + } + } + ] +}