From 9c2926451d77f9568bb1b712134773169134cfed Mon Sep 17 00:00:00 2001 From: deiflaender Date: Thu, 19 Nov 2020 11:36:50 +0100 Subject: [PATCH] RED-744: Expose section grid --- .../redaction/v1/model/CellRectangle.java | 16 +++++++++ .../redaction/v1/model/RedactionResult.java | 1 + .../redaction/v1/model/SectionGrid.java | 18 ++++++++++ .../redaction/v1/model/SectionRectangle.java | 33 +++++++++++++++++++ .../server/classification/model/Document.java | 2 ++ .../classification/model/TextBlock.java | 6 ---- .../controller/RedactionController.java | 28 ++++++++++++---- .../model/AbstractTextContainer.java | 8 +++++ .../service/AnnotationHighlightService.java | 30 ++++++++++++++--- 9 files changed, 126 insertions(+), 16 deletions(-) create mode 100644 redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/CellRectangle.java create mode 100644 redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionGrid.java create mode 100644 redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionRectangle.java diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/CellRectangle.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/CellRectangle.java new file mode 100644 index 00000000..a4cefd8d --- /dev/null +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/CellRectangle.java @@ -0,0 +1,16 @@ +package com.iqser.red.service.redaction.v1.model; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@AllArgsConstructor +@NoArgsConstructor +public class CellRectangle { + + private Point topLeft; + private float width; + private float height; + +} diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionResult.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionResult.java index 8199ec19..398f9fa5 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionResult.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionResult.java @@ -14,5 +14,6 @@ public class RedactionResult { private byte[] document; private int numberOfPages; private RedactionLog redactionLog; + private SectionGrid sectionGrid; } diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionGrid.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionGrid.java new file mode 100644 index 00000000..ea5acb95 --- /dev/null +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionGrid.java @@ -0,0 +1,18 @@ +package com.iqser.red.service.redaction.v1.model; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@AllArgsConstructor +@NoArgsConstructor +public class SectionGrid { + + private Map> rectanglesPerPage = new HashMap<>(); + +} diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionRectangle.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionRectangle.java new file mode 100644 index 00000000..b96e3572 --- /dev/null +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionRectangle.java @@ -0,0 +1,33 @@ +package com.iqser.red.service.redaction.v1.model; + +import java.util.List; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.NonNull; +import lombok.RequiredArgsConstructor; + +@Data +@AllArgsConstructor +@NoArgsConstructor +@RequiredArgsConstructor +public class SectionRectangle { + + @NonNull + private Point topLeft; + + @NonNull + private float width; + + @NonNull + private float height; + + @NonNull + private int part; + + @NonNull + private int numberOfParts; + + private List tableCells; +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Document.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Document.java index e39964ca..1749503d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Document.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Document.java @@ -6,6 +6,7 @@ import java.util.List; import java.util.Map; import com.iqser.red.service.redaction.v1.model.RedactionLogEntry; +import com.iqser.red.service.redaction.v1.model.SectionGrid; import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; import lombok.Data; @@ -25,4 +26,5 @@ public class Document { private boolean headlines; private List redactionLogEntities = new ArrayList<>(); + private SectionGrid sectionGrid = new SectionGrid(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/TextBlock.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/TextBlock.java index 85af6350..0b3b253c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/TextBlock.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/TextBlock.java @@ -97,13 +97,7 @@ public class TextBlock extends AbstractTextContainer { this.maxY = Math.max(y1, y2); } - public float getHeight() { - return maxY - minY; - } - public float getWidth() { - return maxX - minX; - } @Override public String toString() { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java index a86fd12f..532b0d7d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java @@ -13,6 +13,7 @@ import com.iqser.red.service.redaction.v1.model.RedactionLog; import com.iqser.red.service.redaction.v1.model.RedactionLogEntry; import com.iqser.red.service.redaction.v1.model.RedactionRequest; import com.iqser.red.service.redaction.v1.model.RedactionResult; +import com.iqser.red.service.redaction.v1.model.SectionGrid; import com.iqser.red.service.redaction.v1.resources.RedactionResource; import com.iqser.red.service.redaction.v1.server.classification.model.Document; import com.iqser.red.service.redaction.v1.server.classification.model.Page; @@ -41,6 +42,7 @@ public class RedactionController implements RedactionResource { private final DroolsExecutionService droolsExecutionService; private final DictionaryService dictionaryService; + @Override public RedactionResult redact(@RequestBody RedactionRequest redactionRequest) { @@ -49,14 +51,17 @@ public class RedactionController implements RedactionResource { Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument); entityRedactionService.processDocument(classifiedDoc, redactionRequest.getManualRedactions()); - annotationHighlightService.highlight(pdDocument, classifiedDoc, redactionRequest.isFlatRedaction(), redactionRequest.getManualRedactions()); + annotationHighlightService.highlight(pdDocument, classifiedDoc, redactionRequest.isFlatRedaction(), redactionRequest + .getManualRedactions()); if (redactionRequest.isFlatRedaction()) { PDDocument flatDocument = pdfFlattenService.flattenPDF(pdDocument); - return convert(flatDocument, classifiedDoc.getPages().size(), classifiedDoc.getRedactionLogEntities()); + return convert(flatDocument, classifiedDoc.getPages() + .size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid()); } - return convert(pdDocument, classifiedDoc.getPages().size(), classifiedDoc.getRedactionLogEntities()); + return convert(pdDocument, classifiedDoc.getPages() + .size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid()); } catch (IOException e) { throw new RedactionException(e); @@ -64,6 +69,7 @@ public class RedactionController implements RedactionResource { } + @Override public RedactionResult classify(@RequestBody RedactionRequest pdfSegmentationRequest) { @@ -81,6 +87,7 @@ public class RedactionController implements RedactionResource { } + @Override public RedactionResult sections(@RequestBody RedactionRequest redactionRequest) { @@ -98,6 +105,7 @@ public class RedactionController implements RedactionResource { } + @Override public RedactionResult htmlTables(@RequestBody RedactionRequest redactionRequest) { @@ -124,24 +132,32 @@ public class RedactionController implements RedactionResource { } + @Override public void updateRules(@RequestBody String rules) { + droolsExecutionService.updateRules(rules); } private RedactionResult convert(PDDocument document, int numberOfPages) throws IOException { - return convert(document, numberOfPages, null); + + return convert(document, numberOfPages, null, null); } - private RedactionResult convert(PDDocument document, int numberOfPages, List redactionLogEntities) throws IOException { + + private RedactionResult convert(PDDocument document, int numberOfPages, + List redactionLogEntities, + SectionGrid sectionGrid) throws IOException { try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) { document.save(byteArrayOutputStream); return RedactionResult.builder() .document(byteArrayOutputStream.toByteArray()) .numberOfPages(numberOfPages) - .redactionLog(new RedactionLog(redactionLogEntities, dictionaryService.getDictionaryVersion(), droolsExecutionService.getRulesVersion())) + .redactionLog(new RedactionLog(redactionLogEntities, dictionaryService.getDictionaryVersion(), droolsExecutionService + .getRulesVersion())) + .sectionGrid(sectionGrid) .build(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/AbstractTextContainer.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/AbstractTextContainer.java index b4e36e07..faa2b690 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/AbstractTextContainer.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/AbstractTextContainer.java @@ -28,4 +28,12 @@ public abstract class AbstractTextContainer { return page == other.getPage() && this.minX <= other.getTopLeft().getX() && this.maxX >= other.getTopLeft().getX() + other.getWidth() && this.minY <= other.getTopLeft().getY() && this.maxY >= other.getTopLeft().getY() + other.getHeight(); } + public float getHeight() { + return maxY - minY; + } + + public float getWidth() { + return maxX - minX; + } + } \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java index 28f4f275..7eac420c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java @@ -23,13 +23,16 @@ import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup; import org.apache.pdfbox.text.TextPosition; import org.springframework.stereotype.Service; +import com.iqser.red.service.redaction.v1.model.CellRectangle; import com.iqser.red.service.redaction.v1.model.Comment; import com.iqser.red.service.redaction.v1.model.IdRemoval; import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry; import com.iqser.red.service.redaction.v1.model.ManualRedactionType; import com.iqser.red.service.redaction.v1.model.ManualRedactions; +import com.iqser.red.service.redaction.v1.model.Point; import com.iqser.red.service.redaction.v1.model.Rectangle; import com.iqser.red.service.redaction.v1.model.RedactionLogEntry; +import com.iqser.red.service.redaction.v1.model.SectionRectangle; import com.iqser.red.service.redaction.v1.model.Status; import com.iqser.red.service.redaction.v1.server.classification.model.Document; import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph; @@ -105,7 +108,6 @@ public class AnnotationHighlightService { continue; } - boolean requestedToRemove = false; List comments = null; @@ -170,7 +172,6 @@ public class AnnotationHighlightService { } } - } } @@ -452,11 +453,27 @@ public class AnnotationHighlightService { continue; } if (textBlock instanceof TextBlock) { + textBlock.setClassification((i + 1) + "/" + paragraph.getPageBlocks().size()); visualizeTextBlock((TextBlock) textBlock, contentStream); + + classifiedDoc.getSectionGrid() + .getRectanglesPerPage() + .computeIfAbsent(page, (x) -> new ArrayList<>()) + .add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock + .getHeight(), i + 1, paragraph.getPageBlocks().size())); + } else if (textBlock instanceof Table) { + textBlock.setClassification((i + 1) + "/" + paragraph.getPageBlocks().size()); - visualizeTable((Table) textBlock, contentStream); + List cellRectangles = visualizeTable((Table) textBlock, contentStream); + + classifiedDoc.getSectionGrid() + .getRectanglesPerPage() + .computeIfAbsent(page, (x) -> new ArrayList<>()) + .add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock + .getHeight(), i + 1, paragraph.getPageBlocks().size(), cellRectangles)); + } } } @@ -482,8 +499,9 @@ public class AnnotationHighlightService { } - private void visualizeTable(Table table, PDPageContentStream contentStream) throws IOException { + private List visualizeTable(Table table, PDPageContentStream contentStream) throws IOException { + List cellRectangles = new ArrayList<>(); for (List row : table.getRows()) { for (Cell cell : row) { @@ -493,6 +511,8 @@ public class AnnotationHighlightService { contentStream.addRect((float) cell.getX(), (float) cell.getY(), (float) cell.getWidth(), (float) cell .getHeight()); contentStream.stroke(); + cellRectangles.add(new CellRectangle(new Point((float) cell.getX(), (float) cell.getY()), (float) cell + .getWidth(), (float) cell.getHeight())); } } } @@ -505,6 +525,8 @@ public class AnnotationHighlightService { contentStream.showText(table.getClassification()); contentStream.endText(); } + + return cellRectangles; } }