diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java index 6a278ded..2d5f5acb 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java @@ -43,4 +43,6 @@ public class RedactionLogEntry { private int startOffset; private int endOffset; + private boolean isImage; + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Page.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Page.java index 89c81de5..af07f19f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Page.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Page.java @@ -1,8 +1,8 @@ package com.iqser.red.service.redaction.v1.server.classification.model; -import java.awt.geom.Rectangle2D; import java.util.List; +import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle; @@ -17,7 +17,7 @@ public class Page { @NonNull private List textBlocks; - private List imageBounds; + private List images; private Rectangle bodyTextFrame; @@ -31,7 +31,9 @@ public class Page { private StringFrequencyCounter fontCounter = new StringFrequencyCounter(); private StringFrequencyCounter fontStyleCounter = new StringFrequencyCounter(); + public boolean isRotated() { + return rotation != 0; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/ImageClassificationClient.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/ImageClassificationClient.java new file mode 100644 index 00000000..4517dd99 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/ImageClassificationClient.java @@ -0,0 +1,15 @@ +package com.iqser.red.service.redaction.v1.server.client; + +import org.springframework.cloud.openfeign.FeignClient; +import org.springframework.http.MediaType; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.multipart.MultipartFile; + +@FeignClient(name = "ImageClassificationResource", url = "${image-service.url}") +public interface ImageClassificationClient { + + @PostMapping(value = "/process_full_img", consumes = MediaType.MULTIPART_FORM_DATA_VALUE, produces = MediaType.APPLICATION_JSON_VALUE) + ImageClassificationResponse classify(@RequestBody MultipartFile file); + +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/ImageClassificationResponse.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/ImageClassificationResponse.java new file mode 100644 index 00000000..81ae0643 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/ImageClassificationResponse.java @@ -0,0 +1,13 @@ +package com.iqser.red.service.redaction.v1.server.client; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@NoArgsConstructor +@AllArgsConstructor +public class ImageClassificationResponse { + + private String category; +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/MockMultipartFile.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/MockMultipartFile.java new file mode 100644 index 00000000..5dc671e4 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/MockMultipartFile.java @@ -0,0 +1,102 @@ +package com.iqser.red.service.redaction.v1.server.client; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; + +import org.springframework.lang.NonNull; +import org.springframework.lang.Nullable; +import org.springframework.util.Assert; +import org.springframework.util.FileCopyUtils; +import org.springframework.web.multipart.MultipartFile; + +public class MockMultipartFile implements MultipartFile { + + private final String name; + private final String originalFilename; + @Nullable + private final String contentType; + private final byte[] content; + + + public MockMultipartFile(String name, @Nullable byte[] content) { + + this(name, "", (String) null, (byte[]) content); + } + + + public MockMultipartFile(String name, InputStream contentStream) throws IOException { + + this(name, "", (String) null, (byte[]) FileCopyUtils.copyToByteArray(contentStream)); + } + + + public MockMultipartFile(String name, @Nullable String originalFilename, @Nullable String contentType, + @Nullable byte[] content) { + + Assert.hasLength(name, "Name must not be empty"); + this.name = name; + this.originalFilename = originalFilename != null ? originalFilename : ""; + this.contentType = contentType; + this.content = content != null ? content : new byte[0]; + } + + + public MockMultipartFile(String name, @Nullable String originalFilename, @Nullable String contentType, + InputStream contentStream) throws IOException { + + this(name, originalFilename, contentType, FileCopyUtils.copyToByteArray(contentStream)); + } + + + public String getName() { + + return this.name; + } + + + @NonNull + public String getOriginalFilename() { + + return this.originalFilename; + } + + + @Nullable + public String getContentType() { + + return this.contentType; + } + + + public boolean isEmpty() { + + return this.content.length == 0; + } + + + public long getSize() { + + return (long) this.content.length; + } + + + public byte[] getBytes() throws IOException { + + return this.content; + } + + + public InputStream getInputStream() throws IOException { + + return new ByteArrayInputStream(this.content); + } + + + public void transferTo(File dest) throws IOException, IllegalStateException { + + FileCopyUtils.copy(this.content, dest); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java index a9110337..9563dadf 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java @@ -20,6 +20,7 @@ import com.iqser.red.service.redaction.v1.server.redaction.service.AnnotationSer import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService; import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService; import com.iqser.red.service.redaction.v1.server.redaction.service.EntityRedactionService; +import com.iqser.red.service.redaction.v1.server.redaction.service.ImageClassificationService; import com.iqser.red.service.redaction.v1.server.redaction.service.ReanalyzeService; import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService; import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService; @@ -53,6 +54,7 @@ public class RedactionController implements RedactionResource { private final DictionaryService dictionaryService; private final AnnotationService annotationService; private final ReanalyzeService reanalyzeService; + private final ImageClassificationService imageClassificationService; @Override @@ -66,6 +68,7 @@ public class RedactionController implements RedactionResource { log.info("Document structure analysis successful, starting redaction analysis..."); entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getRuleSetId(), analyzeRequest.getManualRedactions()); + imageClassificationService.classifyImages(classifiedDoc); redactionLogCreatorService.createRedactionLog(classifiedDoc, pdDocument.getNumberOfPages(), analyzeRequest.getManualRedactions(), analyzeRequest .getRuleSetId()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java index 5a4265bd..9e2c3be0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java @@ -41,6 +41,7 @@ import org.apache.pdfbox.text.TextPosition; import org.apache.pdfbox.util.Matrix; import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; +import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling; import lombok.Getter; @@ -58,7 +59,7 @@ public class PDFLinesTextStripper extends PDFTextStripper { @Getter private int maxCharWidth; - + @Getter private int minCharHeight; @@ -74,7 +75,7 @@ public class PDFLinesTextStripper extends PDFTextStripper { private final List graphicsPath = new ArrayList<>(); @Getter - private List imageBounds = new ArrayList<>(); + private List images = new ArrayList<>(); private float path_x; private float path_y; @@ -222,7 +223,7 @@ public class PDFLinesTextStripper extends PDFTextStripper { .getWidth(), (float) imageBounds.getHeight()); if (rect.getHeight() > 2 && rect.getWidth() > 2) { - this.imageBounds.add(rect); + this.images.add(new PdfImage(pdfImage.getImage(), rect)); } } } catch (Exception e) { @@ -358,7 +359,7 @@ public class PDFLinesTextStripper extends PDFTextStripper { minCharHeight = Integer.MAX_VALUE; maxCharHeight = 0; textPositionSequences.clear(); - imageBounds = new ArrayList<>(); + images = new ArrayList<>(); rulings.clear(); graphicsPath.clear(); path_x = 0.0f; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/ParsedElements.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/ParsedElements.java deleted file mode 100644 index 479ac2d5..00000000 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/ParsedElements.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.iqser.red.service.redaction.v1.server.parsing.model; - -import java.awt.geom.Rectangle2D; -import java.util.List; - -import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling; - -import lombok.Builder; -import lombok.Data; - -@Data -@Builder -public class ParsedElements { - - private List sequences; - private List rulings; - private List imageBounds; - - private boolean landscape; - private boolean rotated; - - private float minCharWidth; - private float maxCharWidth; -} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/ImageType.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/ImageType.java new file mode 100644 index 00000000..a2a1843f --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/ImageType.java @@ -0,0 +1,5 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model; + +public enum ImageType { + LOGO, FORMULA, SIGNATURE, OTHER, OCR +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/PdfImage.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/PdfImage.java new file mode 100644 index 00000000..856c54f0 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/PdfImage.java @@ -0,0 +1,24 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model; + +import java.awt.geom.Rectangle2D; +import java.awt.image.BufferedImage; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.NonNull; +import lombok.RequiredArgsConstructor; + +@Data +@NoArgsConstructor +@AllArgsConstructor +@RequiredArgsConstructor +public class PdfImage { + + @NonNull + private BufferedImage image; + @NonNull + private Rectangle2D position; + private ImageType imageType; + +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ImageClassificationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ImageClassificationService.java new file mode 100644 index 00000000..6d28bfb2 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ImageClassificationService.java @@ -0,0 +1,62 @@ +package com.iqser.red.service.redaction.v1.server.redaction.service; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import javax.imageio.ImageIO; + +import org.springframework.stereotype.Service; + +import com.iqser.red.service.redaction.v1.server.classification.model.Document; +import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient; +import com.iqser.red.service.redaction.v1.server.client.ImageClassificationResponse; +import com.iqser.red.service.redaction.v1.server.client.MockMultipartFile; +import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType; +import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Service +@RequiredArgsConstructor +public class ImageClassificationService { + + private final ImageClassificationClient imageClassificationClient; + private final RedactionServiceSettings settings; + + + public void classifyImages(Document classifiedDoc) { + + long start = System.currentTimeMillis(); + classifiedDoc.getPages().forEach(page -> { + page.getImages().forEach(image -> { + + if(settings.isEnableImageClassification()) { + try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + ImageIO.write(image.getImage(), "png", baos); + ImageClassificationResponse response = imageClassificationClient.classify(new MockMultipartFile("file", "Image.png", "image/png", baos + .toByteArray())); + image.setImageType(ImageType.valueOf(response.getCategory())); + + } catch (IOException e) { + log.error("Could not classify image", e); + } + } else { + image.setImageType(ImageType.OTHER); + } + + if (image.getImageType().equals(ImageType.OTHER)) { + page.getTextBlocks().forEach(textblock -> { + if (image.getPosition() + .contains(textblock.getMinX(), textblock.getMinY(), textblock.getWidth(), textblock.getHeight())) { + image.setImageType(ImageType.OCR); + } + }); + } + }); + }); + log.info("Image classification took: " + (System.currentTimeMillis() - start)); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java index 88da2588..8bd55e1c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java @@ -248,7 +248,7 @@ public class ReanalyzeService { Iterator itty = renalyzeRequest.getRedactionLog().getRedactionLogEntry().iterator(); while (itty.hasNext()) { RedactionLogEntry entry = itty.next(); - if (sectionsToReanaylse.contains(entry.getSectionNumber()) && !entry.getType().equals("image") || entry.getSectionNumber() == 0 && !entry.getType().equals("image")) { + if (sectionsToReanaylse.contains(entry.getSectionNumber()) && !entry.isImage() || entry.getSectionNumber() == 0 && !entry.isImage()) { itty.remove(); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java index 6b5278ae..32fe5dc7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java @@ -1,9 +1,9 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; -import java.awt.geom.Rectangle2D; import java.util.ArrayList; import java.util.HashSet; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -30,6 +30,8 @@ import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence; +import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType; +import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder; import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; @@ -41,8 +43,6 @@ import lombok.RequiredArgsConstructor; @RequiredArgsConstructor public class RedactionLogCreatorService { - private static final String IMAGE = "image"; - private final DictionaryService dictionaryService; @@ -56,14 +56,16 @@ public class RedactionLogCreatorService { addSectionGrid(classifiedDoc, page); if (classifiedDoc.getEntities().get(page) != null) { - classifiedDoc.getRedactionLogEntities().addAll(addEntries(classifiedDoc.getEntities(), manualRedactions, page, ruleSetId)); + classifiedDoc.getRedactionLogEntities() + .addAll(addEntries(classifiedDoc.getEntities(), manualRedactions, page, ruleSetId)); } if (manualRedactionPages.contains(page)) { - classifiedDoc.getRedactionLogEntities().addAll(addManualAddEntries(manualRedactions.getEntriesToAdd(), manualRedactions.getComments(), page, ruleSetId)); + classifiedDoc.getRedactionLogEntities() + .addAll(addManualAddEntries(manualRedactions.getEntriesToAdd(), manualRedactions.getComments(), page, ruleSetId)); } - if (!classifiedDoc.getPages().get(page - 1).getImageBounds().isEmpty()) { + if (!classifiedDoc.getPages().get(page - 1).getImages().isEmpty()) { addImageEntries(classifiedDoc, page, ruleSetId); } } @@ -72,24 +74,41 @@ public class RedactionLogCreatorService { private void addImageEntries(Document classifiedDoc, int pageNumber, String ruleSetId) { - for (Rectangle2D imageBounds : classifiedDoc.getPages().get(pageNumber - 1).getImageBounds()) { + for (PdfImage image : classifiedDoc.getPages().get(pageNumber - 1).getImages()) { RedactionLogEntry redactionLogEntry = RedactionLogEntry.builder() - .id(IdBuilder.buildId(imageBounds, pageNumber)) - .color(getColor(IMAGE, ruleSetId)) - .type(IMAGE) - .redacted(false) - .isHint(true) + .id(IdBuilder.buildId(image.getPosition(), pageNumber)) + .color(getColor(image.getImageType().name().toLowerCase(Locale.ROOT), ruleSetId)) + .isImage(true) + .type(image.getImageType().equals(ImageType.OTHER) ? "image" : image.getImageType().name().toLowerCase(Locale.ROOT)) + .redacted(isImageRedactionType(image.getImageType())) + .isHint(!isImageRedactionType(image.getImageType())) .manual(false) .isDictionaryEntry(false) .isRecommendation(false) - .positions(List.of(new Rectangle(new Point((float) imageBounds.getX(), (float) imageBounds.getY()), (float) imageBounds - .getWidth(), (float) imageBounds.getHeight(), pageNumber))) + .positions(List.of(new Rectangle(new Point((float) image.getPosition() + .getX(), (float) image.getPosition().getY()), (float) image.getPosition() + .getWidth(), (float) image.getPosition().getHeight(), pageNumber))) .build(); classifiedDoc.getRedactionLogEntities().add(redactionLogEntry); } } + private boolean isImageRedactionType(ImageType imageType) { + + if (imageType.equals(ImageType.LOGO)) { + return true; + } + if (imageType.equals(ImageType.FORMULA)) { + return true; + } + if (imageType.equals(ImageType.SIGNATURE)) { + return true; + } + return false; + } + + private Set getManualRedactionPages(ManualRedactions manualRedactions) { Set manualRedactionPages = new HashSet<>(); @@ -107,7 +126,8 @@ public class RedactionLogCreatorService { } - public List addEntries(Map> entities, ManualRedactions manualRedactions, int page, String ruleSetId) { + public List addEntries(Map> entities, ManualRedactions manualRedactions, + int page, String ruleSetId) { List redactionLogEntities = new ArrayList<>(); @@ -238,8 +258,9 @@ public class RedactionLogCreatorService { } - public List addManualAddEntries(Set manualAdds, Map> comments, int page, - String ruleSetId) { + public List addManualAddEntries(Set manualAdds, + Map> comments, int page, + String ruleSetId) { List redactionLogEntities = new ArrayList<>(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java index 0e6f43f3..27c3bd52 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java @@ -15,7 +15,6 @@ import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService; import com.iqser.red.service.redaction.v1.server.classification.service.ClassificationService; import com.iqser.red.service.redaction.v1.server.parsing.PDFLinesTextStripper; -import com.iqser.red.service.redaction.v1.server.parsing.model.ParsedElements; import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings; @@ -57,19 +56,10 @@ public class PdfSegmentationService { int rotation = pdPage.getRotation(); boolean isRotated = rotation != 0 && rotation != 360; - ParsedElements parsedElements = ParsedElements.builder() - .rulings(stripper.getRulings()) - .sequences(stripper.getTextPositionSequences()) - .imageBounds(stripper.getImageBounds()) - .minCharWidth(stripper.getMinCharWidth()) - .maxCharWidth(stripper.getMaxCharWidth()) - .landscape(isLandscape) - .rotated(isRotated) - .build(); + CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(stripper.getRulings(), stripper.getMinCharWidth(), stripper + .getMaxCharHeight()); - CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(parsedElements.getRulings(), stripper.getMinCharWidth(), stripper.getMaxCharHeight()); - - Page page = blockificationService.blockify(parsedElements.getSequences(), cleanRulings.getHorizontal(), cleanRulings + Page page = blockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings .getVertical()); page.setRotation(rotation); @@ -77,11 +67,11 @@ public class PdfSegmentationService { buildPageStatistics(page); - page.setLandscape(parsedElements.isLandscape() || parsedElements.isRotated()); + page.setLandscape(isLandscape || isRotated); page.setPageNumber(pageNumber); increaseDocumentStatistics(page, document); - page.setImageBounds(parsedElements.getImageBounds()); + page.setImages(stripper.getImages()); pages.add(page); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/settings/RedactionServiceSettings.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/settings/RedactionServiceSettings.java index df95e899..eb57a0f6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/settings/RedactionServiceSettings.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/settings/RedactionServiceSettings.java @@ -12,4 +12,6 @@ public class RedactionServiceSettings { private int surroundingWordsOffsetWindow = 100; + private boolean enableImageClassification = true; + } \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/application.yml b/redaction-service-v1/redaction-service-server-v1/src/main/resources/application.yml index c89a6664..efb01d6f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/resources/application.yml +++ b/redaction-service-v1/redaction-service-server-v1/src/main/resources/application.yml @@ -2,6 +2,7 @@ info: description: Redaction Service Server V1 configuration-service.url: "http://configuration-service-v1:8080" +image-service.url: "http://image-service-v1:8080" server: port: 8080 diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 84a0e1b8..f48cd396 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -83,6 +83,7 @@ import com.iqser.red.service.redaction.v1.model.Status; import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph; import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; +import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient; import com.iqser.red.service.redaction.v1.server.client.RulesClient; import com.iqser.red.service.redaction.v1.server.controller.RedactionController; import com.iqser.red.service.redaction.v1.server.exception.RedactionException; @@ -131,6 +132,9 @@ public class RedactionIntegrationTest { @MockBean private DictionaryClient dictionaryClient; + @MockBean + private ImageClassificationClient imageClassificationClient; + private final Map> dictionary = new HashMap<>(); private final Map typeColorMap = new HashMap<>(); private final Map hintTypeMap = new HashMap<>(); @@ -412,7 +416,8 @@ public class RedactionIntegrationTest { } - private List toDictionaryEntry(List entries){ + private List toDictionaryEntry(List entries) { + List dictionaryEntries = new ArrayList<>(); entries.forEach(entry -> { dictionaryEntries.add(new DictionaryEntry(entry, reanlysisVersions.containsKey(entry) ? reanlysisVersions.get(entry) : 0L, false)); @@ -450,7 +455,6 @@ public class RedactionIntegrationTest { assertThat(entry.getValue().size()).isEqualTo(1); }); - dictionary.get(AUTHOR).add("Drinking water"); when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(1L); @@ -498,7 +502,7 @@ public class RedactionIntegrationTest { System.out.println("redactionTest"); long start = System.currentTimeMillis(); - ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_13_Volume_3CP_A9396G_B-1_2018-09-06.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/new/Single Study - Oral (Gavage) Mouse.pdf"); AnalyzeRequest request = AnalyzeRequest.builder() .ruleSetId(TEST_RULESET_ID) @@ -507,6 +511,12 @@ public class RedactionIntegrationTest { AnalyzeResult result = redactionController.analyze(request); + result.getRedactionLog().getRedactionLogEntry().forEach(entry -> { + if (entry.isImage()) { + System.out.println("---->" + entry.getType()); + } + }); + long end = System.currentTimeMillis(); System.out.println("first analysis duration: " + (end - start)); @@ -519,7 +529,7 @@ public class RedactionIntegrationTest { loop: for (RedactionLogEntry redactionLogEntry : result.getRedactionLog().getRedactionLogEntry()) { for (SectionText sectionText : result.getText().getSectionTexts()) { - if (redactionLogEntry.getType().equals("image")) { + if (redactionLogEntry.isImage()) { correctFound++; continue loop; } @@ -536,7 +546,6 @@ public class RedactionIntegrationTest { } assertThat(correctFound).isEqualTo(result.getRedactionLog().getRedactionLogEntry().size()); - dictionary.get(AUTHOR).add("properties"); reanlysisVersions.put("properties", 1L); @@ -575,127 +584,6 @@ public class RedactionIntegrationTest { } - - @Test - @Ignore - public void fillRecanTest() throws IOException { - - System.out.println("redactionTest"); - long start = System.currentTimeMillis(); - ClassPathResource pdfFileResource = new ClassPathResource("files/S5.pdf"); - - AnalyzeRequest request = AnalyzeRequest.builder() - .ruleSetId(TEST_RULESET_ID) - .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) - .build(); - - AnalyzeResult result = redactionController.analyze(request); - - AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder() - .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) - .redactionLog(result.getRedactionLog()) - .sectionGrid(result.getSectionGrid()) - .build()); - - try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) { - fileOutputStream.write(annotateResponse.getDocument()); - } - - try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Test.json")) { - fileOutputStream.write(objectMapper.writeValueAsBytes(result.getText())); - } - - int correctFound = 0; - loop: - for (RedactionLogEntry redactionLogEntry : result.getRedactionLog().getRedactionLogEntry()) { - for (SectionText sectionText : result.getText().getSectionTexts()) { - if (redactionLogEntry.getType().equals("image")) { - correctFound++; - continue loop; - } - if (redactionLogEntry.getSectionNumber() == sectionText.getSectionNumber()) { - String value = sectionText.getText() - .substring(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset()); - if (redactionLogEntry.getValue().equalsIgnoreCase(value)) { - correctFound++; - } else { - throw new RuntimeException("WTF"); - } - } - } - } - assertThat(correctFound).isEqualTo(result.getRedactionLog().getRedactionLogEntry().size()); - - System.out.println("correctFound " + correctFound); - - long end = System.currentTimeMillis(); - - System.out.println("duration: " + (end - start)); - System.out.println("numberOfPages: " + result.getNumberOfPages()); - - SectionArea sectionArea = result.getText().getSectionTexts().get(3).getSectionAreas().get(5); - - try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(IOUtils.toByteArray(pdfFileResource.getInputStream())))) { - - PDPage docPage = pdDocument.getPage(0); - - PDFTextStripperByArea textStripper = new PDFTextStripperByArea(); - - PDRectangle cropBox = docPage.getCropBox(); - PDRectangle mediaBox = docPage.getMediaBox(); - - -// if (textPositions.get(0).getRotation() == 90) { -// posXEnd = textPositions.get(0).getYDirAdj() + 2; -// posYInit = getY1(); -// posYEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() - height + 4; -// } else { -// posXEnd = textPositions.get(textPositions.size() - 1) -// .getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth() + 1; -// posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2; -// posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1) -// .getYDirAdj() + 2; -// } - - - Rectangle2D rect = new Rectangle2D.Float(sectionArea.getTopLeft() - .getY(), sectionArea.getTopLeft() - .getX() , sectionArea.getHeight(), sectionArea - .getWidth() + 0.001f); - - textStripper.addRegion("region", rect); - - - - textStripper.extractRegions(docPage); - - String textForRegion = textStripper.getTextForRegion("region"); - - System.out.println(textForRegion); - - // fill a rectangle - PDPageContentStream contents = new PDPageContentStream (pdDocument, docPage, PDPageContentStream.AppendMode.APPEND, false, false); - contents.setNonStrokingColor (Color.RED); - contents.addRect (sectionArea.getTopLeft().getX(), sectionArea.getTopLeft().getY(), sectionArea.getWidth(), sectionArea.getHeight()); - contents.fill (); - contents.close (); - try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) { - pdDocument.save(byteArrayOutputStream); - try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated2.pdf")) { - fileOutputStream.write(byteArrayOutputStream.toByteArray()); - } - } - - } catch (Exception e) { - throw new RedactionException(e); - } - - } - - - - - @Test public void testTableRedaction() throws IOException { @@ -782,7 +670,6 @@ public class RedactionIntegrationTest { .status(Status.APPROVED) .build())); - ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder() .redactionLog(result.getRedactionLog()) .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) @@ -791,7 +678,6 @@ public class RedactionIntegrationTest { .ruleSetId(TEST_RULESET_ID) .build()); - AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder() .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) .redactionLog(reanalyzeResult.getRedactionLog()) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/application.yml b/redaction-service-v1/redaction-service-server-v1/src/test/resources/application.yml index fcef84ab..23e59464 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/application.yml +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/application.yml @@ -1,4 +1,5 @@ configuration-service.url: "http://configuration-service-v1:8080" +image-service.url: "http://image-service-v1:8080" ribbon: ConnectTimeout: 600000 @@ -12,3 +13,6 @@ processing.kafkastreams: false platform.multi-tenancy: enabled: false + +redaction-service: + enable-image-classification: false