Pull request #72: RED-744: Expose section grid

Merge in RED/redaction-service from RED-744 to master

* commit '9c2926451d77f9568bb1b712134773169134cfed':
  RED-744: Expose section grid
This commit is contained in:
Dominique Eiflaender 2020-11-19 11:48:24 +01:00
commit 5481dfbcff
9 changed files with 126 additions and 16 deletions

View File

@ -0,0 +1,16 @@
package com.iqser.red.service.redaction.v1.model;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@AllArgsConstructor
@NoArgsConstructor
public class CellRectangle {
private Point topLeft;
private float width;
private float height;
}

View File

@ -14,5 +14,6 @@ public class RedactionResult {
private byte[] document;
private int numberOfPages;
private RedactionLog redactionLog;
private SectionGrid sectionGrid;
}

View File

@ -0,0 +1,18 @@
package com.iqser.red.service.redaction.v1.model;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@AllArgsConstructor
@NoArgsConstructor
public class SectionGrid {
private Map<Integer, List<SectionRectangle>> rectanglesPerPage = new HashMap<>();
}

View File

@ -0,0 +1,33 @@
package com.iqser.red.service.redaction.v1.model;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
@Data
@AllArgsConstructor
@NoArgsConstructor
@RequiredArgsConstructor
public class SectionRectangle {
@NonNull
private Point topLeft;
@NonNull
private float width;
@NonNull
private float height;
@NonNull
private int part;
@NonNull
private int numberOfParts;
private List<CellRectangle> tableCells;
}

View File

@ -6,6 +6,7 @@ import java.util.List;
import java.util.Map;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.SectionGrid;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import lombok.Data;
@ -25,4 +26,5 @@ public class Document {
private boolean headlines;
private List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
private SectionGrid sectionGrid = new SectionGrid();
}

View File

@ -97,13 +97,7 @@ public class TextBlock extends AbstractTextContainer {
this.maxY = Math.max(y1, y2);
}
public float getHeight() {
return maxY - minY;
}
public float getWidth() {
return maxX - minX;
}
@Override
public String toString() {

View File

@ -13,6 +13,7 @@ import com.iqser.red.service.redaction.v1.model.RedactionLog;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
import com.iqser.red.service.redaction.v1.model.RedactionResult;
import com.iqser.red.service.redaction.v1.model.SectionGrid;
import com.iqser.red.service.redaction.v1.resources.RedactionResource;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
@ -41,6 +42,7 @@ public class RedactionController implements RedactionResource {
private final DroolsExecutionService droolsExecutionService;
private final DictionaryService dictionaryService;
@Override
public RedactionResult redact(@RequestBody RedactionRequest redactionRequest) {
@ -49,14 +51,17 @@ public class RedactionController implements RedactionResource {
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
entityRedactionService.processDocument(classifiedDoc, redactionRequest.getManualRedactions());
annotationHighlightService.highlight(pdDocument, classifiedDoc, redactionRequest.isFlatRedaction(), redactionRequest.getManualRedactions());
annotationHighlightService.highlight(pdDocument, classifiedDoc, redactionRequest.isFlatRedaction(), redactionRequest
.getManualRedactions());
if (redactionRequest.isFlatRedaction()) {
PDDocument flatDocument = pdfFlattenService.flattenPDF(pdDocument);
return convert(flatDocument, classifiedDoc.getPages().size(), classifiedDoc.getRedactionLogEntities());
return convert(flatDocument, classifiedDoc.getPages()
.size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid());
}
return convert(pdDocument, classifiedDoc.getPages().size(), classifiedDoc.getRedactionLogEntities());
return convert(pdDocument, classifiedDoc.getPages()
.size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid());
} catch (IOException e) {
throw new RedactionException(e);
@ -64,6 +69,7 @@ public class RedactionController implements RedactionResource {
}
@Override
public RedactionResult classify(@RequestBody RedactionRequest pdfSegmentationRequest) {
@ -81,6 +87,7 @@ public class RedactionController implements RedactionResource {
}
@Override
public RedactionResult sections(@RequestBody RedactionRequest redactionRequest) {
@ -98,6 +105,7 @@ public class RedactionController implements RedactionResource {
}
@Override
public RedactionResult htmlTables(@RequestBody RedactionRequest redactionRequest) {
@ -124,24 +132,32 @@ public class RedactionController implements RedactionResource {
}
@Override
public void updateRules(@RequestBody String rules) {
droolsExecutionService.updateRules(rules);
}
private RedactionResult convert(PDDocument document, int numberOfPages) throws IOException {
return convert(document, numberOfPages, null);
return convert(document, numberOfPages, null, null);
}
private RedactionResult convert(PDDocument document, int numberOfPages, List<RedactionLogEntry> redactionLogEntities) throws IOException {
private RedactionResult convert(PDDocument document, int numberOfPages,
List<RedactionLogEntry> redactionLogEntities,
SectionGrid sectionGrid) throws IOException {
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
document.save(byteArrayOutputStream);
return RedactionResult.builder()
.document(byteArrayOutputStream.toByteArray())
.numberOfPages(numberOfPages)
.redactionLog(new RedactionLog(redactionLogEntities, dictionaryService.getDictionaryVersion(), droolsExecutionService.getRulesVersion()))
.redactionLog(new RedactionLog(redactionLogEntities, dictionaryService.getDictionaryVersion(), droolsExecutionService
.getRulesVersion()))
.sectionGrid(sectionGrid)
.build();
}

View File

@ -28,4 +28,12 @@ public abstract class AbstractTextContainer {
return page == other.getPage() && this.minX <= other.getTopLeft().getX() && this.maxX >= other.getTopLeft().getX() + other.getWidth() && this.minY <= other.getTopLeft().getY() && this.maxY >= other.getTopLeft().getY() + other.getHeight();
}
public float getHeight() {
return maxY - minY;
}
public float getWidth() {
return maxX - minX;
}
}

View File

@ -23,13 +23,16 @@ import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup;
import org.apache.pdfbox.text.TextPosition;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.model.CellRectangle;
import com.iqser.red.service.redaction.v1.model.Comment;
import com.iqser.red.service.redaction.v1.model.IdRemoval;
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
import com.iqser.red.service.redaction.v1.model.ManualRedactionType;
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
import com.iqser.red.service.redaction.v1.model.Point;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.SectionRectangle;
import com.iqser.red.service.redaction.v1.model.Status;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
@ -105,7 +108,6 @@ public class AnnotationHighlightService {
continue;
}
boolean requestedToRemove = false;
List<Comment> comments = null;
@ -170,7 +172,6 @@ public class AnnotationHighlightService {
}
}
}
}
@ -452,11 +453,27 @@ public class AnnotationHighlightService {
continue;
}
if (textBlock instanceof TextBlock) {
textBlock.setClassification((i + 1) + "/" + paragraph.getPageBlocks().size());
visualizeTextBlock((TextBlock) textBlock, contentStream);
classifiedDoc.getSectionGrid()
.getRectanglesPerPage()
.computeIfAbsent(page, (x) -> new ArrayList<>())
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock
.getHeight(), i + 1, paragraph.getPageBlocks().size()));
} else if (textBlock instanceof Table) {
textBlock.setClassification((i + 1) + "/" + paragraph.getPageBlocks().size());
visualizeTable((Table) textBlock, contentStream);
List<CellRectangle> cellRectangles = visualizeTable((Table) textBlock, contentStream);
classifiedDoc.getSectionGrid()
.getRectanglesPerPage()
.computeIfAbsent(page, (x) -> new ArrayList<>())
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock
.getHeight(), i + 1, paragraph.getPageBlocks().size(), cellRectangles));
}
}
}
@ -482,8 +499,9 @@ public class AnnotationHighlightService {
}
private void visualizeTable(Table table, PDPageContentStream contentStream) throws IOException {
private List<CellRectangle> visualizeTable(Table table, PDPageContentStream contentStream) throws IOException {
List<CellRectangle> cellRectangles = new ArrayList<>();
for (List<Cell> row : table.getRows()) {
for (Cell cell : row) {
@ -493,6 +511,8 @@ public class AnnotationHighlightService {
contentStream.addRect((float) cell.getX(), (float) cell.getY(), (float) cell.getWidth(), (float) cell
.getHeight());
contentStream.stroke();
cellRectangles.add(new CellRectangle(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
.getWidth(), (float) cell.getHeight()));
}
}
}
@ -505,6 +525,8 @@ public class AnnotationHighlightService {
contentStream.showText(table.getClassification());
contentStream.endText();
}
return cellRectangles;
}
}