RED-2440: Integrated image-service-v2
This commit is contained in:
parent
db16f8c1da
commit
dee1aa1f01
@ -12,7 +12,7 @@
|
||||
<artifactId>redaction-service-api-v1</artifactId>
|
||||
|
||||
<properties>
|
||||
<persistence-service.version>0.149.0</persistence-service.version>
|
||||
<persistence-service.version>0.151.0</persistence-service.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
|
||||
@ -7,6 +7,7 @@ import lombok.Data;
|
||||
import lombok.NonNull;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
@ -16,7 +17,7 @@ public class Page {
|
||||
@NonNull
|
||||
private List<AbstractTextContainer> textBlocks;
|
||||
|
||||
private List<PdfImage> images;
|
||||
private List<PdfImage> images = new ArrayList<>();
|
||||
|
||||
private Rectangle bodyTextFrame;
|
||||
|
||||
|
||||
@ -1,15 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.client;
|
||||
|
||||
import org.springframework.cloud.openfeign.FeignClient;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
@FeignClient(name = "ImageClassificationResource", url = "${image-service.url}")
|
||||
public interface ImageClassificationClient {
|
||||
|
||||
@PostMapping(value = "/process_full_img", consumes = MediaType.MULTIPART_FORM_DATA_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
|
||||
ImageClassificationResponse classify(@RequestBody MultipartFile file);
|
||||
|
||||
}
|
||||
@ -1,13 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.client;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class ImageClassificationResponse {
|
||||
|
||||
private String category;
|
||||
}
|
||||
@ -88,7 +88,7 @@ public class RedactionController implements RedactionResource {
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest
|
||||
.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
try {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream);
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
|
||||
|
||||
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest
|
||||
.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
@ -116,7 +116,7 @@ public class RedactionController implements RedactionResource {
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest
|
||||
.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
try {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream);
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
|
||||
|
||||
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest
|
||||
.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
@ -145,7 +145,7 @@ public class RedactionController implements RedactionResource {
|
||||
try {
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest
|
||||
.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, true);
|
||||
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
|
||||
} catch (Exception e) {
|
||||
throw new RedactionException(e);
|
||||
}
|
||||
|
||||
@ -183,9 +183,9 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
graphicsPath.clear();
|
||||
break;
|
||||
|
||||
case OperatorName.DRAW_OBJECT:
|
||||
processImageOperation(arguments);
|
||||
break;
|
||||
// case OperatorName.DRAW_OBJECT:
|
||||
// processImageOperation(arguments);
|
||||
// break;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -389,8 +389,7 @@ public class PDFTextStripper extends LegacyPDFStreamEngine
|
||||
}
|
||||
}
|
||||
characterListMapping.clear();
|
||||
super.processPage(page);
|
||||
writePage();
|
||||
super.processPage(page);writePage();
|
||||
endPage(page);
|
||||
}
|
||||
}
|
||||
|
||||
@ -16,6 +16,7 @@ public class PdfImage {
|
||||
private BufferedImage image;
|
||||
@NonNull
|
||||
private RedRectangle2D position;
|
||||
@NonNull
|
||||
private ImageType imageType;
|
||||
private boolean isAppendedToParagraph;
|
||||
private boolean hasTransparency;
|
||||
|
||||
@ -0,0 +1,14 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class Classification {
|
||||
|
||||
private Map<String, Float> probabilities = new HashMap<>();
|
||||
private String label;
|
||||
|
||||
}
|
||||
@ -0,0 +1,10 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class FilterGeometry {
|
||||
|
||||
private ImageSize imageSize;
|
||||
private ImageFormat imageFormat;
|
||||
}
|
||||
@ -0,0 +1,11 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class Filters {
|
||||
|
||||
private FilterGeometry geometry;
|
||||
private Probability probability;
|
||||
private boolean allPassed;
|
||||
}
|
||||
@ -0,0 +1,9 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class Geometry {
|
||||
private float width;
|
||||
private float height;
|
||||
}
|
||||
@ -0,0 +1,12 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class ImageFormat {
|
||||
|
||||
private float quotient;
|
||||
private boolean tooTall;
|
||||
private boolean tooWide;
|
||||
|
||||
}
|
||||
@ -0,0 +1,12 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class ImageMetadata {
|
||||
|
||||
private Classification classification;
|
||||
private Position position;
|
||||
private Geometry geometry;
|
||||
private Filters filters;
|
||||
}
|
||||
@ -0,0 +1,15 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class ImageServiceResponse {
|
||||
|
||||
private String dossierId;
|
||||
private String fileId;
|
||||
private List<ImageMetadata> imageMetadata = new ArrayList<>();
|
||||
|
||||
}
|
||||
@ -0,0 +1,12 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class ImageSize {
|
||||
|
||||
private float quotient;
|
||||
private boolean tooLarge;
|
||||
private boolean tooSmall;
|
||||
|
||||
}
|
||||
@ -0,0 +1,12 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class Position {
|
||||
private float x1;
|
||||
private float x2;
|
||||
private float y1;
|
||||
private float y2;
|
||||
private int pageNumber;
|
||||
}
|
||||
@ -0,0 +1,10 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class Probability {
|
||||
|
||||
private boolean unconfident;
|
||||
|
||||
}
|
||||
@ -26,8 +26,10 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncre
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryVersion;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.segmentation.ImageService;
|
||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
@ -53,6 +55,7 @@ public class AnalyzeService {
|
||||
private final SectionTextBuilderService sectionTextBuilderService;
|
||||
private final SectionGridCreatorService sectionGridCreatorService;
|
||||
private final NerAnalyserService nerAnalyserService;
|
||||
private final ImageService imageService;
|
||||
|
||||
|
||||
public void analyzeDocumentStructure(StructureAnalyzeRequest analyzeRequest) {
|
||||
@ -63,9 +66,15 @@ public class AnalyzeService {
|
||||
Document classifiedDoc;
|
||||
|
||||
try {
|
||||
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest
|
||||
.getDossierId(), analyzeRequest.getFileId(), FileType.ORIGIN));
|
||||
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream);
|
||||
|
||||
Map<Integer, List<PdfImage>> pdfImages = null;
|
||||
if(redactionServiceSettings.isEnableImageClassification()) {
|
||||
pdfImages = imageService.convertImages(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
}
|
||||
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, pdfImages);
|
||||
pageCount = classifiedDoc.getPages().size();
|
||||
} catch (Exception e) {
|
||||
throw new RedactionException(e);
|
||||
|
||||
@ -1,71 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.client.MockMultipartFile;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class ImageClassificationService {
|
||||
|
||||
private final ImageClassificationClient imageClassificationClient;
|
||||
private final RedactionServiceSettings settings;
|
||||
|
||||
|
||||
public void classifyImages(Page page) {
|
||||
|
||||
page.getImages().forEach(image -> {
|
||||
|
||||
if (settings.isEnableImageClassification() && !isEntirePageImage(image, page)) {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||
ImageIO.write(image.getImage(), "png", baos);
|
||||
var mockFile = new MockMultipartFile("file", "Image.png", "image/png", baos.toByteArray());
|
||||
ImageClassificationResponse response = imageClassificationClient.classify(mockFile);
|
||||
image.setImageType(ImageType.valueOf(response.getCategory()));
|
||||
} catch (Exception e) {
|
||||
log.error("Could not classify image", e);
|
||||
image.setImageType(ImageType.OTHER);
|
||||
}
|
||||
log.info("Image classification took: " + (System.currentTimeMillis() - start));
|
||||
} else {
|
||||
image.setImageType(ImageType.OTHER);
|
||||
}
|
||||
|
||||
image.getImage().flush();
|
||||
image.setImage(null);
|
||||
|
||||
if (image.getImageType().equals(ImageType.OTHER)) {
|
||||
page.getTextBlocks().forEach(textblock -> {
|
||||
if (image.getPosition()
|
||||
.contains(textblock.getMinX(), textblock.getMinY(), textblock.getWidth(), textblock.getHeight())) {
|
||||
image.setImageType(ImageType.OCR);
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
private boolean isEntirePageImage(PdfImage image, Page page){
|
||||
double imageArea = image.getPosition().getHeight() * image.getPosition().getWidth();
|
||||
if(imageArea / page.getCropBoxArea() >= settings.getMaxImageCropboxRatio()){
|
||||
log.info("Skipping image classification because images is almost as large as the entire page");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,165 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.segmentation;
|
||||
|
||||
import java.awt.Graphics;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class ImageMergeService {
|
||||
|
||||
|
||||
public List<PdfImage> mergeImages(List<PdfImage> images, int rotation){
|
||||
|
||||
List<PdfImage> mergedList = processImages(images, rotation);
|
||||
|
||||
List<PdfImage> imagesInImage = new ArrayList<>();
|
||||
for(PdfImage image: mergedList){
|
||||
for (PdfImage inner: mergedList){
|
||||
if(image != inner && image.getPosition().contains(inner.getPosition().getX(), inner.getPosition().getY(), inner.getPosition().getWidth(), inner.getPosition().getHeight())){
|
||||
imagesInImage.add(inner);
|
||||
}
|
||||
}
|
||||
}
|
||||
mergedList.removeAll(imagesInImage);
|
||||
|
||||
return mergedList;
|
||||
}
|
||||
|
||||
|
||||
//merge images, if they are separated during pdf import, return new list of Pdfimages
|
||||
private List<PdfImage> processImages(List<PdfImage> imageList, int rotation) {
|
||||
if (imageList.size() > 1) {
|
||||
List<PdfImage> mergedList = new ArrayList<>();
|
||||
int countElementsInList = 0;
|
||||
boolean beginImage = true;
|
||||
|
||||
// a List of Boolean, true = candidate for merging, false = no merging
|
||||
List<Boolean> candidatesList = getCandidatesList(imageList, rotation);
|
||||
|
||||
// loop through list, if there are candidates for merging (true), merge images and add it to mergedList
|
||||
for (int i = 0; i < candidatesList.size(); i++) {
|
||||
if (candidatesList.get(i)) {
|
||||
if (beginImage) {
|
||||
//begin of image, merge two parts of imageList
|
||||
PdfImage mergedImage = mergeTwoImages(imageList.get(i), imageList.get(i + 1), rotation);
|
||||
// image merge successful
|
||||
if (mergedImage != null) {
|
||||
mergedList.add(mergedImage);
|
||||
countElementsInList++;
|
||||
}
|
||||
} else {
|
||||
//middle of an image, merge current piece auf mergedList with image of imageList
|
||||
PdfImage mergedImage = mergeTwoImages(mergedList.get(countElementsInList - 1), imageList.get(i + 1), rotation);
|
||||
// image merge successful
|
||||
if (mergedImage != null) {
|
||||
mergedList.set(countElementsInList - 1, mergedImage);
|
||||
}
|
||||
}
|
||||
beginImage = false;
|
||||
} else {
|
||||
// if the last candidate is false, then both images i and i+1 must be added
|
||||
if (i == candidatesList.size() - 1) {
|
||||
if (countElementsInList > 0 && mergedList.get(countElementsInList - 1) == imageList.get(i)) {
|
||||
mergedList.add(imageList.get(i + 1));
|
||||
} else {
|
||||
mergedList.add(imageList.get(i));
|
||||
mergedList.add(imageList.get(i + 1));
|
||||
}
|
||||
} else {
|
||||
//first image is not splitted, add i to resultlist
|
||||
if (beginImage) {
|
||||
mergedList.add(imageList.get(i));
|
||||
countElementsInList++;
|
||||
} else {
|
||||
// i is the end of an image, add begin of new image
|
||||
mergedList.add(imageList.get(i + 1));
|
||||
countElementsInList++;
|
||||
beginImage = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return mergedList;
|
||||
} else {
|
||||
return imageList;
|
||||
}
|
||||
}
|
||||
|
||||
private PdfImage mergeTwoImages(PdfImage image1, PdfImage image2, int rotation) {
|
||||
|
||||
// diese Angaben von getPosition scheinen nicht richtig zu sein, damit werden teile des Bildes abgeschnitten
|
||||
double width = image1.getPosition().getWidth();
|
||||
double width2 = image2.getPosition().getWidth();
|
||||
double height1 = image1.getPosition().getHeight();
|
||||
double height2 = image2.getPosition().getHeight();
|
||||
// mit den Werten, die unter Image gespeichert sind, funktioniert es
|
||||
double img1height = image1.getImage().getHeight();
|
||||
double img1width = image1.getImage().getWidth();
|
||||
double img2height = image2.getImage().getHeight();
|
||||
|
||||
BufferedImage mergedImage = new BufferedImage((int) img1width, (int) (img1height + img2height), BufferedImage.TYPE_INT_RGB);
|
||||
Graphics mergedImageGraphics = mergedImage.getGraphics();
|
||||
try {
|
||||
mergedImageGraphics.drawImage(image1.getImage(), 0, 0, null);
|
||||
mergedImageGraphics.drawImage(image2.getImage(), 0, (int) (img1height), null);
|
||||
|
||||
// set Image, Position and type for merged Image
|
||||
//set position for merged image with values of image1 and the height of both
|
||||
Rectangle2D pos = new Rectangle2D.Float();
|
||||
pos.setRect(image1.getPosition().getX(), image2.getPosition().getY(), rotation == 90 ? width + width2: width, rotation == 90 ? height1 : height1 + height2);
|
||||
PdfImage newPdfImage = new PdfImage(mergedImage, pos, image1.getPage(), image1.isHasTransparency() || image2.isHasTransparency());
|
||||
// Graphics need to be disposed
|
||||
|
||||
image1.getImage().flush();
|
||||
image2.getImage().flush();
|
||||
|
||||
mergedImage.flush();
|
||||
mergedImageGraphics.dispose();
|
||||
|
||||
return newPdfImage;
|
||||
} catch (Exception e) {
|
||||
// failed to merge image
|
||||
log.error("Failed to merge image", e);
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
//make a list of true and false, if the image is a candidate for merging
|
||||
private List<Boolean> getCandidatesList(List<PdfImage> imageList, int rotation) {
|
||||
List<Boolean> candidatesList = new ArrayList<>();
|
||||
for (int i = 0; i < imageList.size(); i++) {
|
||||
if (i >= 1) {
|
||||
candidatesList.add(isCandidateForMerging(imageList.get(i - 1), imageList.get(i), rotation));
|
||||
}
|
||||
}
|
||||
return candidatesList;
|
||||
}
|
||||
|
||||
// evaluate if two images are candidates for merging, depending on their coordinates, width and height
|
||||
private boolean isCandidateForMerging(PdfImage image1, PdfImage image2, int rotation) {
|
||||
double x1 = rotation == 90 ? image1.getPosition().getY() : image1.getPosition().getX();
|
||||
double y1 = rotation == 90 ? image1.getPosition().getX() : image1.getPosition().getY();
|
||||
double width1 = rotation == 90 ? image1.getPosition().getHeight() : image1.getPosition().getWidth();
|
||||
double x2 = rotation == 90 ? image2.getPosition().getY() : image2.getPosition().getX();
|
||||
double y2 = rotation == 90 ? image2.getPosition().getX() : image2.getPosition().getY();
|
||||
double width2 = rotation == 90 ? image2.getPosition().getHeight() : image2.getPosition().getWidth();
|
||||
double height2 = rotation == 90 ? image2.getPosition().getWidth() : image2.getPosition().getHeight();
|
||||
//if the x-coordinates and widths of images are equal and the height is equal to difference between y-coordinates,
|
||||
// then it is the same picture and has to be merged -> return true
|
||||
return x1 == x2 && width1 == width2 && Math.ceil(height2) == Math.ceil(rotation == 90 ? y2 - y1 : y1 - y2) && width2 > (height2 / 6);
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,61 @@
|
||||
package com.iqser.red.service.redaction.v1.server.segmentation;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.image.ImageServiceResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class ImageService {
|
||||
|
||||
private final ObjectMapper objectMapper;
|
||||
private final RedactionStorageService redactionStorageService;
|
||||
|
||||
@SneakyThrows
|
||||
public Map<Integer, List<PdfImage>> convertImages(String dossierId, String fileId){
|
||||
|
||||
var imageClassificationStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMAGE_INFO));
|
||||
ImageServiceResponse imageServiceResponse = objectMapper.readValue(imageClassificationStream, ImageServiceResponse.class);
|
||||
|
||||
Map<Integer, List<PdfImage>> images = new HashMap<>();
|
||||
imageServiceResponse.getImageMetadata().stream().forEach(imageMetadata -> {
|
||||
var classification = imageMetadata.getFilters().isAllPassed() ? ImageType.valueOf(imageMetadata.getClassification().getLabel().toUpperCase(Locale.ROOT)) : ImageType.OTHER;
|
||||
images.computeIfAbsent(imageMetadata.getPosition().getPageNumber() ,x -> new ArrayList<>())
|
||||
.add(new PdfImage(new RedRectangle2D(imageMetadata.getPosition().getX1(), imageMetadata.getPosition().getY1(), imageMetadata.getGeometry().getWidth(), imageMetadata.getGeometry().getHeight()), classification, imageMetadata.getPosition().getPageNumber()));
|
||||
});
|
||||
|
||||
return images;
|
||||
}
|
||||
|
||||
|
||||
public void findOcr(Page page){
|
||||
page.getImages().forEach(image -> {
|
||||
if (image.getImageType().equals(ImageType.OTHER)) {
|
||||
page.getTextBlocks().forEach(textblock -> {
|
||||
if (image.getPosition()
|
||||
.contains(textblock.getMinX(), textblock.getMinY(), textblock.getWidth(), textblock.getHeight())) {
|
||||
image.setImageType(ImageType.OCR);
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@ -10,6 +10,7 @@ import java.nio.file.attribute.PosixFilePermission;
|
||||
import java.nio.file.attribute.PosixFilePermissions;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
@ -29,7 +30,6 @@ import com.iqser.red.service.redaction.v1.server.memory.MemoryStats;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.PDFLinesTextStripper;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.ImageClassificationService;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
|
||||
@ -50,17 +50,10 @@ public class PdfSegmentationService {
|
||||
private final BlockificationService blockificationService;
|
||||
private final ClassificationService classificationService;
|
||||
private final SectionsBuilderService sectionsBuilderService;
|
||||
private final ImageClassificationService imageClassificationService;
|
||||
private final ImageMergeService imageMergeService;
|
||||
private final ImageService imageService;
|
||||
|
||||
|
||||
public Document parseDocument(InputStream documentInputStream) throws IOException {
|
||||
|
||||
return parseDocument(documentInputStream, false);
|
||||
}
|
||||
|
||||
|
||||
public Document parseDocument(InputStream documentInputStream, boolean ignoreImages) throws IOException {
|
||||
public Document parseDocument(InputStream documentInputStream, Map<Integer, List<PdfImage>> pdfImages) throws IOException {
|
||||
|
||||
PDDocument pdDocument = null;
|
||||
try {
|
||||
@ -122,15 +115,14 @@ public class PdfSegmentationService {
|
||||
page.setLandscape(isLandscape || isRotated);
|
||||
page.setPageNumber(pageNumber);
|
||||
|
||||
List<PdfImage> mergedList = imageMergeService.mergeImages(stripper.getImages(), rotation);
|
||||
page.setImages(mergedList);
|
||||
|
||||
tableExtractionService.extractTables(cleanRulings, page);
|
||||
buildPageStatistics(page);
|
||||
increaseDocumentStatistics(page, document);
|
||||
|
||||
if (!ignoreImages) {
|
||||
imageClassificationService.classifyImages(page);
|
||||
if (pdfImages != null && pdfImages.containsKey(pageNumber)) {
|
||||
page.setImages(pdfImages.get(pageNumber));
|
||||
imageService.findOcr(page);
|
||||
}
|
||||
|
||||
pages.add(page);
|
||||
|
||||
@ -127,6 +127,8 @@ public class RedactionStorageService {
|
||||
return dossierId + "/" + fileId + "." + fileType.name() + fileType.getExtension();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -75,7 +75,6 @@ import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
||||
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||
@ -137,9 +136,6 @@ public class RedactionIntegrationTest {
|
||||
@MockBean
|
||||
private DictionaryClient dictionaryClient;
|
||||
|
||||
@MockBean
|
||||
private ImageClassificationClient imageClassificationClient;
|
||||
|
||||
@Autowired
|
||||
private RedactionStorageService redactionStorageService;
|
||||
|
||||
@ -899,7 +895,7 @@ public class RedactionIntegrationTest {
|
||||
@Test
|
||||
public void redactionTest() throws IOException {
|
||||
|
||||
String fileName = "files/new/S416.pdf";
|
||||
String fileName = "files/new/Single Study - Oral (Gavage) Mouse.pdf";
|
||||
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
@ -1,12 +1,16 @@
|
||||
package com.iqser.red.service.redaction.v1.server.segmentation;
|
||||
|
||||
import com.amazonaws.services.s3.AmazonS3;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.redaction.v1.server.Application;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.image.ImageServiceResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
|
||||
@ -31,13 +35,18 @@ import javax.imageio.ImageIO;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils.getTemporaryDirectory;
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@RunWith(SpringRunner.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
@ -68,42 +77,29 @@ public class PdfSegmentationServiceTest {
|
||||
@MockBean
|
||||
private LegalBasisClient legalBasisClient;
|
||||
|
||||
@Autowired
|
||||
private ObjectMapper objectMapper;
|
||||
|
||||
@Configuration
|
||||
@EnableAutoConfiguration(exclude = { RabbitAutoConfiguration.class})
|
||||
public static class TestConfiguration {
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMergeImages() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/270Rotated.pdf");
|
||||
|
||||
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
assertThat(document.getPages().get(0).getImages().size()).isEqualTo(1);
|
||||
assertThat(document.getPages().get(1).getImages().size()).isEqualTo(0);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
@Ignore
|
||||
public void testExtractImages() throws IOException {
|
||||
@SneakyThrows
|
||||
public void testMapping(){
|
||||
ClassPathResource responseJson = new ClassPathResource("files/image_response.json");
|
||||
ImageServiceResponse imageServiceResponse = objectMapper.readValue(responseJson.getInputStream(), ImageServiceResponse.class);
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/new/Single Study - Oral (Gavage) Mouse.pdf");
|
||||
Map<Integer, List<PdfImage>> images = new HashMap<>();
|
||||
imageServiceResponse.getImageMetadata().stream().forEach(imageMetadata -> {
|
||||
images.computeIfAbsent(imageMetadata.getPosition().getPageNumber() ,x -> new ArrayList<>())
|
||||
.add(new PdfImage(new RedRectangle2D(imageMetadata.getPosition().getX1(), imageMetadata.getPosition().getY1(), imageMetadata.getGeometry().getWidth(), imageMetadata.getGeometry().getHeight()), ImageType.valueOf(imageMetadata.getClassification().getLabel().toUpperCase(Locale.ROOT)), imageMetadata.getPosition().getPageNumber()));
|
||||
});
|
||||
|
||||
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
int i = 0;
|
||||
for (Page page : document.getPages()) {
|
||||
for (PdfImage image : page.getImages()) {
|
||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||
ImageIO.write(image.getImage(), "png", baos);
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Image " + i + ".png")) {
|
||||
fileOutputStream.write(baos.toByteArray());
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
System.out.println("object");
|
||||
}
|
||||
|
||||
|
||||
@ -112,7 +108,7 @@ public class PdfSegmentationServiceTest {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf");
|
||||
|
||||
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream(), null);
|
||||
assertThat(document.getParagraphs()
|
||||
.stream()
|
||||
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||
@ -133,7 +129,7 @@ public class PdfSegmentationServiceTest {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Table.pdf");
|
||||
|
||||
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream(), null);
|
||||
assertThat(document.getParagraphs()
|
||||
.stream()
|
||||
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||
@ -171,7 +167,7 @@ public class PdfSegmentationServiceTest {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Multi Page Table.pdf");
|
||||
|
||||
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream(), null);
|
||||
assertThat(document.getParagraphs()
|
||||
.stream()
|
||||
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||
@ -209,7 +205,7 @@ public class PdfSegmentationServiceTest {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Rotated Table Headers.pdf");
|
||||
|
||||
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream(), null);
|
||||
assertThat(document.getParagraphs()
|
||||
.stream()
|
||||
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||
|
||||
@ -0,0 +1,686 @@
|
||||
{
|
||||
"dossierId": "f889853e-4bf8-49a9-aae5-c38605c6ef40",
|
||||
"fileId": "22ef63e29bb2a27db8497272336f6b32",
|
||||
"imageMetadata": [
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"logo": 1.0,
|
||||
"signature": 0.0,
|
||||
"other": 0.0,
|
||||
"formula": 0.0
|
||||
},
|
||||
"label": "logo"
|
||||
},
|
||||
"position": {
|
||||
"x1": 89.88,
|
||||
"x2": 274.20000000000005,
|
||||
"y1": 716.24,
|
||||
"y2": 770.0,
|
||||
"pageNumber": 1
|
||||
},
|
||||
"geometry": {
|
||||
"width": 184.32000000000005,
|
||||
"height": 53.75999999999999
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.14298074612038092,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 3.42857142857143,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"signature": 0.999968,
|
||||
"logo": 1.6e-05,
|
||||
"other": 1.6e-05,
|
||||
"formula": 0.0
|
||||
},
|
||||
"label": "signature"
|
||||
},
|
||||
"position": {
|
||||
"x1": -0.10000600000000001,
|
||||
"x2": 595.099994,
|
||||
"y1": -0.07998660000000002,
|
||||
"y2": 842.0800134,
|
||||
"pageNumber": 3
|
||||
},
|
||||
"geometry": {
|
||||
"width": 595.2,
|
||||
"height": 842.16
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 1.0000782051152328,
|
||||
"tooLarge": true,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.706754060986036,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"signature": 0.999872,
|
||||
"other": 7.9e-05,
|
||||
"logo": 4.8e-05,
|
||||
"formula": 0.0
|
||||
},
|
||||
"label": "signature"
|
||||
},
|
||||
"position": {
|
||||
"x1": -0.10000600000000001,
|
||||
"x2": 595.099994,
|
||||
"y1": -0.07998660000000002,
|
||||
"y2": 842.0800134,
|
||||
"pageNumber": 7
|
||||
},
|
||||
"geometry": {
|
||||
"width": 595.2,
|
||||
"height": 842.16
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 1.0000782051152328,
|
||||
"tooLarge": true,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.706754060986036,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"signature": 0.996366,
|
||||
"other": 0.00,
|
||||
"logo": 2.3e-05,
|
||||
"formula": 4e-06
|
||||
},
|
||||
"label": "signature"
|
||||
},
|
||||
"position": {
|
||||
"x1": -0.10000600000000001,
|
||||
"x2": 595.099994,
|
||||
"y1": -0.07998660000000002,
|
||||
"y2": 842.0800134,
|
||||
"pageNumber": 8
|
||||
},
|
||||
"geometry": {
|
||||
"width": 595.2,
|
||||
"height": 842.16
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 1.0002630764355351,
|
||||
"tooLarge": true,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.706754060986036,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"signature": 0.999772,
|
||||
"logo": 0.000131,
|
||||
"other": 9.7e-05,
|
||||
"formula": 0.0
|
||||
},
|
||||
"label": "signature"
|
||||
},
|
||||
"position": {
|
||||
"x1": 82.59443842482001,
|
||||
"x2": 512.6365568843402,
|
||||
"y1": 116.943736387567,
|
||||
"y2": 725.0718450317352,
|
||||
"pageNumber": 73
|
||||
},
|
||||
"geometry": {
|
||||
"width": 430.04211845952017,
|
||||
"height": 608.1281086441682
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.72236755521117,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.7071571143427432,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"signature": 1.0,
|
||||
"logo": 0.0,
|
||||
"other": 0.0,
|
||||
"formula": 0.0
|
||||
},
|
||||
"label": "signature"
|
||||
},
|
||||
"position": {
|
||||
"x1": 328.20483600000006,
|
||||
"x2": 393.94460940000005,
|
||||
"y1": 175.1643178,
|
||||
"y2": 203.92865619999998,
|
||||
"pageNumber": 81
|
||||
},
|
||||
"geometry": {
|
||||
"width": 65.73977339999999,
|
||||
"height": 28.764338399999986
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.06142518774572455,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 2.2854609929078022,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"signature": 1.0,
|
||||
"logo": 0.0,
|
||||
"other": 0.0,
|
||||
"formula": 0.0
|
||||
},
|
||||
"label": "signature"
|
||||
},
|
||||
"position": {
|
||||
"x1": 136.5955818,
|
||||
"x2": 224.72461859999999,
|
||||
"y1": 175.1133172,
|
||||
"y2": 203.97965680000001,
|
||||
"pageNumber": 81
|
||||
},
|
||||
"geometry": {
|
||||
"width": 88.1290368,
|
||||
"height": 28.866339600000003
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.07124601312700823,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 3.053003533568904,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"logo": 1.0,
|
||||
"signature": 0.0,
|
||||
"other": 0.0,
|
||||
"formula": 0.0
|
||||
},
|
||||
"label": "logo"
|
||||
},
|
||||
"position": {
|
||||
"x1": 194.99126880000003,
|
||||
"x2": 399.80967840000005,
|
||||
"y1": 554.6597824,
|
||||
"y2": 686.2413304,
|
||||
"pageNumber": 81
|
||||
},
|
||||
"geometry": {
|
||||
"width": 204.81840960000002,
|
||||
"height": 131.581548
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.23189275858788796,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 1.5565891472868219,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"other": 1.0,
|
||||
"signature": 0.0,
|
||||
"formula": 0.0,
|
||||
"logo": 0.0
|
||||
},
|
||||
"label": "other"
|
||||
},
|
||||
"position": {
|
||||
"x1": 133.9945512,
|
||||
"x2": 242.52382799999998,
|
||||
"y1": 411.24609519999996,
|
||||
"y2": 523.2434128,
|
||||
"pageNumber": 90
|
||||
},
|
||||
"geometry": {
|
||||
"width": 108.52927679999999,
|
||||
"height": 111.99731760000003
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.15573364968831904,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.9690346083788703,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"other": 1.0,
|
||||
"signature": 0.0,
|
||||
"formula": 0.0,
|
||||
"logo": 0.0
|
||||
},
|
||||
"label": "other"
|
||||
},
|
||||
"position": {
|
||||
"x1": 133.5865464,
|
||||
"x2": 242.3198256,
|
||||
"y1": 274.972492,
|
||||
"y2": 387.7858192,
|
||||
"pageNumber": 90
|
||||
},
|
||||
"geometry": {
|
||||
"width": 108.7332792,
|
||||
"height": 112.8133272
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.15644678522591335,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.9638336347197106,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"other": 1.0,
|
||||
"signature": 0.0,
|
||||
"formula": 0.0,
|
||||
"logo": 0.0
|
||||
},
|
||||
"label": "other"
|
||||
},
|
||||
"position": {
|
||||
"x1": 246.19587120000003,
|
||||
"x2": 356.5611696,
|
||||
"y1": 400.84197279999995,
|
||||
"y2": 519.3673672,
|
||||
"pageNumber": 90
|
||||
},
|
||||
"geometry": {
|
||||
"width": 110.3652984,
|
||||
"height": 118.52539440000004
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.1615575178049721,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.9311531841652321,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"other": 1.0,
|
||||
"signature": 0.0,
|
||||
"formula": 0.0,
|
||||
"logo": 0.0
|
||||
},
|
||||
"label": "other"
|
||||
},
|
||||
"position": {
|
||||
"x1": 244.9718568,
|
||||
"x2": 358.3971912,
|
||||
"y1": 274.972492,
|
||||
"y2": 387.7858192,
|
||||
"pageNumber": 90
|
||||
},
|
||||
"geometry": {
|
||||
"width": 113.4253344,
|
||||
"height": 112.8133272
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.15978662903260646,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 1.0054249547920433,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"other": 1.0,
|
||||
"signature": 0.0,
|
||||
"formula": 0.0,
|
||||
"logo": 0.0
|
||||
},
|
||||
"label": "other"
|
||||
},
|
||||
"position": {
|
||||
"x1": 254.9679744,
|
||||
"x2": 371.6573472,
|
||||
"y1": 439.6024288,
|
||||
"y2": 564.0438928,
|
||||
"pageNumber": 91
|
||||
},
|
||||
"geometry": {
|
||||
"width": 116.6893728,
|
||||
"height": 124.441464
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.17021718544102565,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.9377049180327869,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"other": 1.0,
|
||||
"signature": 0.0,
|
||||
"formula": 0.0,
|
||||
"logo": 0.0
|
||||
},
|
||||
"label": "other"
|
||||
},
|
||||
"position": {
|
||||
"x1": 133.9945512,
|
||||
"x2": 249.663912,
|
||||
"y1": 443.07046959999997,
|
||||
"y2": 687.2613424,
|
||||
"pageNumber": 91
|
||||
},
|
||||
"geometry": {
|
||||
"width": 115.66936080000002,
|
||||
"height": 244.19087280000002
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.23739910530627284,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.4736842105263158,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"other": 1.0,
|
||||
"signature": 0.0,
|
||||
"formula": 0.0,
|
||||
"logo": 0.0
|
||||
},
|
||||
"label": "other"
|
||||
},
|
||||
"position": {
|
||||
"x1": 105.84222,
|
||||
"x2": 374.870385,
|
||||
"y1": 526.40545,
|
||||
"y2": 687.05734,
|
||||
"pageNumber": 92
|
||||
},
|
||||
"geometry": {
|
||||
"width": 269.028165,
|
||||
"height": 160.65188999999998
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.2936614851112628,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 1.6746031746031749,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"logo": 0.788068,
|
||||
"other": 0.152259,
|
||||
"formula": 0.036883,
|
||||
"signature": 0.02279
|
||||
},
|
||||
"label": "logo"
|
||||
},
|
||||
"position": {
|
||||
"x1": 44.64999049990001,
|
||||
"x2": 550.5759424999001,
|
||||
"y1": 63.286004150029996,
|
||||
"y2": 778.72242095003,
|
||||
"pageNumber": 94
|
||||
},
|
||||
"geometry": {
|
||||
"width": 505.92595200000005,
|
||||
"height": 715.4364168000001
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.8498341845521462,
|
||||
"tooLarge": true,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.7071571143427431,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"signature": 0.998335,
|
||||
"logo": 0.000955,
|
||||
"other": 0.000703,
|
||||
"formula": 7e-06
|
||||
},
|
||||
"label": "signature"
|
||||
},
|
||||
"position": {
|
||||
"x1": 58.954005540029996,
|
||||
"x2": 536.45979618003,
|
||||
"y1": 83.94401504006001,
|
||||
"y2": 758.05854296006,
|
||||
"pageNumber": 95
|
||||
},
|
||||
"geometry": {
|
||||
"width": 477.50579064000004,
|
||||
"height": 674.11452792
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.8014221863697041,
|
||||
"tooLarge": true,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.7083452007974936,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": false
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user