Pull request #72: RED-744: Expose section grid
Merge in RED/redaction-service from RED-744 to master * commit '9c2926451d77f9568bb1b712134773169134cfed': RED-744: Expose section grid
This commit is contained in:
commit
5481dfbcff
@ -0,0 +1,16 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public class CellRectangle {
|
||||
|
||||
private Point topLeft;
|
||||
private float width;
|
||||
private float height;
|
||||
|
||||
}
|
||||
@ -14,5 +14,6 @@ public class RedactionResult {
|
||||
private byte[] document;
|
||||
private int numberOfPages;
|
||||
private RedactionLog redactionLog;
|
||||
private SectionGrid sectionGrid;
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,18 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public class SectionGrid {
|
||||
|
||||
private Map<Integer, List<SectionRectangle>> rectanglesPerPage = new HashMap<>();
|
||||
|
||||
}
|
||||
@ -0,0 +1,33 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.NonNull;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
@RequiredArgsConstructor
|
||||
public class SectionRectangle {
|
||||
|
||||
@NonNull
|
||||
private Point topLeft;
|
||||
|
||||
@NonNull
|
||||
private float width;
|
||||
|
||||
@NonNull
|
||||
private float height;
|
||||
|
||||
@NonNull
|
||||
private int part;
|
||||
|
||||
@NonNull
|
||||
private int numberOfParts;
|
||||
|
||||
private List<CellRectangle> tableCells;
|
||||
}
|
||||
@ -6,6 +6,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.SectionGrid;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
|
||||
import lombok.Data;
|
||||
@ -25,4 +26,5 @@ public class Document {
|
||||
private boolean headlines;
|
||||
|
||||
private List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
|
||||
private SectionGrid sectionGrid = new SectionGrid();
|
||||
}
|
||||
|
||||
@ -97,13 +97,7 @@ public class TextBlock extends AbstractTextContainer {
|
||||
this.maxY = Math.max(y1, y2);
|
||||
}
|
||||
|
||||
public float getHeight() {
|
||||
return maxY - minY;
|
||||
}
|
||||
|
||||
public float getWidth() {
|
||||
return maxX - minX;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
@ -13,6 +13,7 @@ import com.iqser.red.service.redaction.v1.model.RedactionLog;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
||||
import com.iqser.red.service.redaction.v1.model.SectionGrid;
|
||||
import com.iqser.red.service.redaction.v1.resources.RedactionResource;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
@ -41,6 +42,7 @@ public class RedactionController implements RedactionResource {
|
||||
private final DroolsExecutionService droolsExecutionService;
|
||||
private final DictionaryService dictionaryService;
|
||||
|
||||
|
||||
@Override
|
||||
public RedactionResult redact(@RequestBody RedactionRequest redactionRequest) {
|
||||
|
||||
@ -49,14 +51,17 @@ public class RedactionController implements RedactionResource {
|
||||
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc, redactionRequest.getManualRedactions());
|
||||
annotationHighlightService.highlight(pdDocument, classifiedDoc, redactionRequest.isFlatRedaction(), redactionRequest.getManualRedactions());
|
||||
annotationHighlightService.highlight(pdDocument, classifiedDoc, redactionRequest.isFlatRedaction(), redactionRequest
|
||||
.getManualRedactions());
|
||||
|
||||
if (redactionRequest.isFlatRedaction()) {
|
||||
PDDocument flatDocument = pdfFlattenService.flattenPDF(pdDocument);
|
||||
return convert(flatDocument, classifiedDoc.getPages().size(), classifiedDoc.getRedactionLogEntities());
|
||||
return convert(flatDocument, classifiedDoc.getPages()
|
||||
.size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid());
|
||||
}
|
||||
|
||||
return convert(pdDocument, classifiedDoc.getPages().size(), classifiedDoc.getRedactionLogEntities());
|
||||
return convert(pdDocument, classifiedDoc.getPages()
|
||||
.size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid());
|
||||
|
||||
} catch (IOException e) {
|
||||
throw new RedactionException(e);
|
||||
@ -64,6 +69,7 @@ public class RedactionController implements RedactionResource {
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public RedactionResult classify(@RequestBody RedactionRequest pdfSegmentationRequest) {
|
||||
|
||||
@ -81,6 +87,7 @@ public class RedactionController implements RedactionResource {
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public RedactionResult sections(@RequestBody RedactionRequest redactionRequest) {
|
||||
|
||||
@ -98,6 +105,7 @@ public class RedactionController implements RedactionResource {
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public RedactionResult htmlTables(@RequestBody RedactionRequest redactionRequest) {
|
||||
|
||||
@ -124,24 +132,32 @@ public class RedactionController implements RedactionResource {
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void updateRules(@RequestBody String rules) {
|
||||
|
||||
droolsExecutionService.updateRules(rules);
|
||||
}
|
||||
|
||||
|
||||
private RedactionResult convert(PDDocument document, int numberOfPages) throws IOException {
|
||||
return convert(document, numberOfPages, null);
|
||||
|
||||
return convert(document, numberOfPages, null, null);
|
||||
}
|
||||
|
||||
private RedactionResult convert(PDDocument document, int numberOfPages, List<RedactionLogEntry> redactionLogEntities) throws IOException {
|
||||
|
||||
private RedactionResult convert(PDDocument document, int numberOfPages,
|
||||
List<RedactionLogEntry> redactionLogEntities,
|
||||
SectionGrid sectionGrid) throws IOException {
|
||||
|
||||
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
|
||||
document.save(byteArrayOutputStream);
|
||||
return RedactionResult.builder()
|
||||
.document(byteArrayOutputStream.toByteArray())
|
||||
.numberOfPages(numberOfPages)
|
||||
.redactionLog(new RedactionLog(redactionLogEntities, dictionaryService.getDictionaryVersion(), droolsExecutionService.getRulesVersion()))
|
||||
.redactionLog(new RedactionLog(redactionLogEntities, dictionaryService.getDictionaryVersion(), droolsExecutionService
|
||||
.getRulesVersion()))
|
||||
.sectionGrid(sectionGrid)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@ -28,4 +28,12 @@ public abstract class AbstractTextContainer {
|
||||
return page == other.getPage() && this.minX <= other.getTopLeft().getX() && this.maxX >= other.getTopLeft().getX() + other.getWidth() && this.minY <= other.getTopLeft().getY() && this.maxY >= other.getTopLeft().getY() + other.getHeight();
|
||||
}
|
||||
|
||||
public float getHeight() {
|
||||
return maxY - minY;
|
||||
}
|
||||
|
||||
public float getWidth() {
|
||||
return maxX - minX;
|
||||
}
|
||||
|
||||
}
|
||||
@ -23,13 +23,16 @@ import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup;
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.CellRectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.Comment;
|
||||
import com.iqser.red.service.redaction.v1.model.IdRemoval;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualRedactionType;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
|
||||
import com.iqser.red.service.redaction.v1.model.Point;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.SectionRectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.Status;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
||||
@ -105,7 +108,6 @@ public class AnnotationHighlightService {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
boolean requestedToRemove = false;
|
||||
List<Comment> comments = null;
|
||||
|
||||
@ -170,7 +172,6 @@ public class AnnotationHighlightService {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -452,11 +453,27 @@ public class AnnotationHighlightService {
|
||||
continue;
|
||||
}
|
||||
if (textBlock instanceof TextBlock) {
|
||||
|
||||
textBlock.setClassification((i + 1) + "/" + paragraph.getPageBlocks().size());
|
||||
visualizeTextBlock((TextBlock) textBlock, contentStream);
|
||||
|
||||
classifiedDoc.getSectionGrid()
|
||||
.getRectanglesPerPage()
|
||||
.computeIfAbsent(page, (x) -> new ArrayList<>())
|
||||
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock
|
||||
.getHeight(), i + 1, paragraph.getPageBlocks().size()));
|
||||
|
||||
} else if (textBlock instanceof Table) {
|
||||
|
||||
textBlock.setClassification((i + 1) + "/" + paragraph.getPageBlocks().size());
|
||||
visualizeTable((Table) textBlock, contentStream);
|
||||
List<CellRectangle> cellRectangles = visualizeTable((Table) textBlock, contentStream);
|
||||
|
||||
classifiedDoc.getSectionGrid()
|
||||
.getRectanglesPerPage()
|
||||
.computeIfAbsent(page, (x) -> new ArrayList<>())
|
||||
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock
|
||||
.getHeight(), i + 1, paragraph.getPageBlocks().size(), cellRectangles));
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -482,8 +499,9 @@ public class AnnotationHighlightService {
|
||||
}
|
||||
|
||||
|
||||
private void visualizeTable(Table table, PDPageContentStream contentStream) throws IOException {
|
||||
private List<CellRectangle> visualizeTable(Table table, PDPageContentStream contentStream) throws IOException {
|
||||
|
||||
List<CellRectangle> cellRectangles = new ArrayList<>();
|
||||
for (List<Cell> row : table.getRows()) {
|
||||
for (Cell cell : row) {
|
||||
|
||||
@ -493,6 +511,8 @@ public class AnnotationHighlightService {
|
||||
contentStream.addRect((float) cell.getX(), (float) cell.getY(), (float) cell.getWidth(), (float) cell
|
||||
.getHeight());
|
||||
contentStream.stroke();
|
||||
cellRectangles.add(new CellRectangle(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
|
||||
.getWidth(), (float) cell.getHeight()));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -505,6 +525,8 @@ public class AnnotationHighlightService {
|
||||
contentStream.showText(table.getClassification());
|
||||
contentStream.endText();
|
||||
}
|
||||
|
||||
return cellRectangles;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user