RED-1010: Splittet redaction endpoint to analyis and annotation endpoints
This commit is contained in:
parent
40a9b39688
commit
d739a4f2f5
@ -0,0 +1,17 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class AnalyzeRequest {
|
||||
|
||||
private byte[] document;
|
||||
private String ruleSetId;
|
||||
private ManualRedactions manualRedactions;
|
||||
}
|
||||
@ -0,0 +1,18 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class AnalyzeResult {
|
||||
|
||||
private int numberOfPages;
|
||||
private RedactionLog redactionLog;
|
||||
private SectionGrid sectionGrid;
|
||||
|
||||
}
|
||||
@ -0,0 +1,17 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class AnnotateRequest {
|
||||
|
||||
private byte[] document;
|
||||
private RedactionLog redactionLog;
|
||||
private SectionGrid sectionGrid;
|
||||
}
|
||||
@ -0,0 +1,15 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class AnnotateResponse {
|
||||
|
||||
private byte[] document;
|
||||
}
|
||||
@ -37,4 +37,7 @@ public class RedactionLogEntry {
|
||||
private String textBefore;
|
||||
private String textAfter;
|
||||
|
||||
@Builder.Default
|
||||
private List<Comment> comments = new ArrayList<>();
|
||||
|
||||
}
|
||||
|
||||
@ -13,6 +13,5 @@ public class RedactionRequest {
|
||||
|
||||
private byte[] document;
|
||||
private String ruleSetId;
|
||||
private boolean flatRedaction;
|
||||
private ManualRedactions manualRedactions;
|
||||
}
|
||||
|
||||
@ -1,5 +1,9 @@
|
||||
package com.iqser.red.service.redaction.v1.resources;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.AnnotateRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.AnnotateResponse;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
||||
import org.springframework.http.MediaType;
|
||||
@ -14,8 +18,11 @@ public interface RedactionResource {
|
||||
String RULE_SET_PARAMETER_NAME = "ruleSetId";
|
||||
String RULE_SET_PATH_VARIABLE = "/{" + RULE_SET_PARAMETER_NAME + "}";
|
||||
|
||||
@PostMapping(value = "/redact", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||
RedactionResult redact(@RequestBody RedactionRequest redactionRequest);
|
||||
@PostMapping(value = "/analyze", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||
AnalyzeResult analyze(@RequestBody AnalyzeRequest analyzeRequest);
|
||||
|
||||
@PostMapping(value = "/annotate", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||
AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest);
|
||||
|
||||
@PostMapping(value = "/debug/classifications", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||
RedactionResult classify(@RequestBody RedactionRequest redactionRequest);
|
||||
|
||||
@ -1,5 +1,9 @@
|
||||
package com.iqser.red.service.redaction.v1.server.controller;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.AnnotateRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.AnnotateResponse;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLog;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||
@ -14,10 +18,13 @@ import com.iqser.red.service.redaction.v1.server.redaction.service.EntityRedacti
|
||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.visualization.service.AnnotationHighlightService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.AnnotationService;
|
||||
import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.springframework.web.bind.annotation.PathVariable;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
@ -35,34 +42,30 @@ public class RedactionController implements RedactionResource {
|
||||
|
||||
private final PdfVisualisationService pdfVisualisationService;
|
||||
private final PdfSegmentationService pdfSegmentationService;
|
||||
private final AnnotationHighlightService annotationHighlightService;
|
||||
private final RedactionLogCreatorService redactionLogCreatorService;
|
||||
private final EntityRedactionService entityRedactionService;
|
||||
private final DroolsExecutionService droolsExecutionService;
|
||||
private final AnnotationService annotationService;
|
||||
|
||||
|
||||
@Override
|
||||
public RedactionResult redact(@RequestBody RedactionRequest redactionRequest) {
|
||||
public AnalyzeResult analyze(@RequestBody AnalyzeRequest analyzeRequest) {
|
||||
|
||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
|
||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(analyzeRequest.getDocument()))) {
|
||||
pdDocument.setAllSecurityToBeRemoved(true);
|
||||
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
|
||||
log.info("Document structure analysis successful, starting redaction analysis...");
|
||||
|
||||
entityRedactionService.processDocument(classifiedDoc, redactionRequest.getRuleSetId(), redactionRequest.getManualRedactions());
|
||||
annotationHighlightService.highlight(pdDocument, classifiedDoc, redactionRequest.isFlatRedaction(), redactionRequest
|
||||
.getManualRedactions(), redactionRequest.getRuleSetId());
|
||||
entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getRuleSetId(), analyzeRequest.getManualRedactions());
|
||||
redactionLogCreatorService.createRedactionLog(classifiedDoc, pdDocument.getNumberOfPages(), analyzeRequest.getManualRedactions(), analyzeRequest
|
||||
.getRuleSetId());
|
||||
|
||||
log.info("Redaction analysis successful...");
|
||||
|
||||
return convert(pdDocument,
|
||||
classifiedDoc.getPages().size(),
|
||||
classifiedDoc.getRedactionLogEntities(),
|
||||
classifiedDoc.getSectionGrid(),
|
||||
classifiedDoc.getDictionaryVersion(),
|
||||
classifiedDoc.getRulesVersion(),
|
||||
redactionRequest.getRuleSetId());
|
||||
return AnalyzeResult.builder().sectionGrid(classifiedDoc.getSectionGrid()).redactionLog(new RedactionLog(classifiedDoc.getRedactionLogEntities(), classifiedDoc.getDictionaryVersion(), classifiedDoc
|
||||
.getRulesVersion(), analyzeRequest.getRuleSetId())).build();
|
||||
|
||||
} catch (Exception e) {
|
||||
throw new RedactionException(e);
|
||||
@ -71,6 +74,25 @@ public class RedactionController implements RedactionResource {
|
||||
}
|
||||
|
||||
|
||||
public AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest) {
|
||||
|
||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(annotateRequest.getDocument()))) {
|
||||
|
||||
pdDocument.setAllSecurityToBeRemoved(true);
|
||||
|
||||
annotationService.annotate(pdDocument, annotateRequest.getRedactionLog(), annotateRequest.getSectionGrid());
|
||||
|
||||
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
|
||||
pdDocument.save(byteArrayOutputStream);
|
||||
return AnnotateResponse.builder().document(byteArrayOutputStream.toByteArray()).build();
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
throw new RedactionException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public RedactionResult classify(@RequestBody RedactionRequest pdfSegmentationRequest) {
|
||||
|
||||
@ -136,11 +158,14 @@ public class RedactionController implements RedactionResource {
|
||||
|
||||
@Override
|
||||
public void updateRules(@PathVariable(RULE_SET_PARAMETER_NAME) String ruleSetId) {
|
||||
|
||||
droolsExecutionService.updateRules(ruleSetId);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void testRules(@RequestBody String rules) {
|
||||
|
||||
droolsExecutionService.testRules(rules);
|
||||
}
|
||||
|
||||
@ -152,8 +177,8 @@ public class RedactionController implements RedactionResource {
|
||||
|
||||
|
||||
private RedactionResult convert(PDDocument document, int numberOfPages,
|
||||
List<RedactionLogEntry> redactionLogEntities,
|
||||
SectionGrid sectionGrid, long dictionaryVersion, long rulesVersion, String ruleSetId) throws IOException {
|
||||
List<RedactionLogEntry> redactionLogEntities, SectionGrid sectionGrid,
|
||||
long dictionaryVersion, long rulesVersion, String ruleSetId) throws IOException {
|
||||
|
||||
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
|
||||
document.save(byteArrayOutputStream);
|
||||
|
||||
@ -0,0 +1,269 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.GregorianCalendar;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
|
||||
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationText;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.CellRectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.Comment;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLog;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.SectionGrid;
|
||||
import com.iqser.red.service.redaction.v1.model.SectionRectangle;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class AnnotationService {
|
||||
|
||||
private final DictionaryService dictionaryService;
|
||||
|
||||
|
||||
public void annotate(PDDocument document, RedactionLog redactionLog, SectionGrid sectionGrid) throws IOException {
|
||||
|
||||
Map<Integer, List<RedactionLogEntry>> redactionLogPerPage = convertRedactionLog(redactionLog);
|
||||
|
||||
for (int page = 1; page <= document.getNumberOfPages(); page++) {
|
||||
|
||||
PDPage pdPage = document.getPage(page - 1);
|
||||
|
||||
List<SectionRectangle> sectionRectangles = sectionGrid.getRectanglesPerPage().get(page);
|
||||
if (sectionRectangles != null && !sectionRectangles.isEmpty()) {
|
||||
drawSectionGrid(document, pdPage, sectionRectangles);
|
||||
}
|
||||
|
||||
List<RedactionLogEntry> logEntries = redactionLogPerPage.get(page);
|
||||
if (logEntries != null && !logEntries.isEmpty()) {
|
||||
addAnnotations(logEntries, pdPage, page, redactionLog.getRuleSetId());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void addAnnotations(List<RedactionLogEntry> logEntries, PDPage pdPage, int page,
|
||||
String ruleSetId) throws IOException {
|
||||
|
||||
List<PDAnnotation> annotations = pdPage.getAnnotations();
|
||||
|
||||
for (RedactionLogEntry entry : logEntries) {
|
||||
annotations.addAll(createAnnotation(entry, page, ruleSetId, pdPage.getMediaBox(), pdPage.getCropBox()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private List<PDAnnotation> createAnnotation(RedactionLogEntry redactionLogEntry, int page, String ruleSetId,
|
||||
PDRectangle mediaBox, PDRectangle cropBox) {
|
||||
|
||||
List<PDAnnotation> annotations = new ArrayList<>();
|
||||
|
||||
List<Rectangle> rectangles = redactionLogEntry.getPositions()
|
||||
.stream()
|
||||
.filter(pos -> pos.getPage() == page)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
if (rectangles.isEmpty()) {
|
||||
return annotations;
|
||||
}
|
||||
|
||||
PDAnnotationTextMarkup annotation = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
|
||||
annotation.constructAppearances();
|
||||
PDRectangle pdRectangle = toPDRectangle(rectangles, mediaBox, cropBox);
|
||||
annotation.setRectangle(pdRectangle);
|
||||
annotation.setQuadPoints(toQuadPoints(rectangles, mediaBox, cropBox));
|
||||
if (!dictionaryService.isHint(redactionLogEntry.getType(), ruleSetId)) {
|
||||
annotation.setContents(createAnnotationContent(redactionLogEntry));
|
||||
}
|
||||
annotation.setTitlePopup(redactionLogEntry.getId());
|
||||
annotation.setAnnotationName(redactionLogEntry.getId());
|
||||
annotation.setColor(new PDColor(redactionLogEntry.getColor(), PDDeviceRGB.INSTANCE));
|
||||
annotations.add(annotation);
|
||||
|
||||
if (redactionLogEntry.getComments() != null) {
|
||||
for (Comment comment : redactionLogEntry.getComments()) {
|
||||
PDAnnotationText txtAnnot = new PDAnnotationText();
|
||||
txtAnnot.setAnnotationName(comment.getId());
|
||||
txtAnnot.setInReplyTo(annotation); // Reference to highlight annotation
|
||||
txtAnnot.setName(PDAnnotationText.NAME_COMMENT);
|
||||
txtAnnot.setCreationDate(GregorianCalendar.from(comment.getDate().toZonedDateTime()));
|
||||
txtAnnot.setTitlePopup(comment.getUser());
|
||||
txtAnnot.setContents(comment.getText());
|
||||
txtAnnot.setRectangle(pdRectangle);
|
||||
annotations.add(txtAnnot);
|
||||
}
|
||||
}
|
||||
|
||||
return annotations;
|
||||
}
|
||||
|
||||
|
||||
private String createAnnotationContent(RedactionLogEntry redactionLogEntry) {
|
||||
|
||||
if (redactionLogEntry.isManual()) {
|
||||
return "\nManual Redaction\n\nIn Section : \"" + redactionLogEntry.getSection() + "\"";
|
||||
}
|
||||
return "\nRule " + redactionLogEntry.getMatchedRule() + " matched\n\n" + redactionLogEntry.getReason() + "\n\nLegal basis:" + redactionLogEntry
|
||||
.getLegalBasis() + "\n\nIn section: \"" + redactionLogEntry.getSection() + "\"";
|
||||
}
|
||||
|
||||
|
||||
private PDRectangle toPDRectangle(List<Rectangle> rectangles, PDRectangle mediaBox, PDRectangle cropBox) {
|
||||
|
||||
float lowerLeftX = Float.MAX_VALUE;
|
||||
float upperRightX = 0;
|
||||
float lowerLeftY = 0;
|
||||
float upperRightY = Float.MAX_VALUE;
|
||||
|
||||
for (Rectangle rectangle : rectangles) {
|
||||
if (rectangle.getTopLeft().getX() < lowerLeftX) {
|
||||
lowerLeftX = rectangle.getTopLeft().getX();
|
||||
}
|
||||
if (rectangle.getTopLeft().getX() + rectangle.getWidth() > upperRightX) {
|
||||
upperRightX = rectangle.getTopLeft().getX() + rectangle.getWidth();
|
||||
}
|
||||
if (rectangle.getTopLeft().getY() + rectangle.getHeight() > lowerLeftY) {
|
||||
lowerLeftY = rectangle.getTopLeft().getY() + rectangle.getHeight();
|
||||
}
|
||||
if (rectangle.getTopLeft().getY() < upperRightY) {
|
||||
upperRightY = rectangle.getTopLeft().getY();
|
||||
}
|
||||
}
|
||||
|
||||
var x1 = lowerLeftX + cropBox.getLowerLeftX() - mediaBox.getLowerLeftY();
|
||||
var y1 = lowerLeftY + (mediaBox.getLowerLeftY() - cropBox.getLowerLeftY());
|
||||
var x2 = upperRightX + cropBox.getLowerLeftX() - mediaBox.getLowerLeftY();
|
||||
var y2 = upperRightY - (mediaBox.getLowerLeftY() - cropBox.getLowerLeftY());
|
||||
|
||||
PDRectangle annotationPosition = new PDRectangle();
|
||||
annotationPosition.setLowerLeftX(x1);
|
||||
annotationPosition.setLowerLeftY(y1);
|
||||
annotationPosition.setUpperRightX(x2);
|
||||
annotationPosition.setUpperRightY(y2);
|
||||
return annotationPosition;
|
||||
}
|
||||
|
||||
|
||||
private float[] toQuadPoints(List<Rectangle> rectangles, PDRectangle mediaBox, PDRectangle cropBox) {
|
||||
|
||||
float[] quadPoints = new float[rectangles.size() * 8];
|
||||
int i = 0;
|
||||
|
||||
for (Rectangle rectangle : rectangles) {
|
||||
float[] quadPoint = toQuadPoint(rectangle, mediaBox, cropBox);
|
||||
for (int j = 0; j <= 7; j++) {
|
||||
quadPoints[i + j] = quadPoint[j];
|
||||
}
|
||||
i += 8;
|
||||
}
|
||||
return quadPoints;
|
||||
}
|
||||
|
||||
|
||||
private float[] toQuadPoint(Rectangle rectangle, PDRectangle mediaBox, PDRectangle cropBox) {
|
||||
|
||||
var x1 = rectangle.getTopLeft().getX() + cropBox.getLowerLeftX() - mediaBox.getLowerLeftY();
|
||||
var y1 = rectangle.getTopLeft().getY() + (mediaBox.getLowerLeftY() - cropBox.getLowerLeftY());
|
||||
var x2 = rectangle.getTopLeft()
|
||||
.getX() + rectangle.getWidth() + cropBox.getLowerLeftX() - mediaBox.getLowerLeftY();
|
||||
var y2 = rectangle.getTopLeft()
|
||||
.getY() + rectangle.getHeight() - (mediaBox.getLowerLeftY() - cropBox.getLowerLeftY());
|
||||
|
||||
// quadPoints is array of x,y coordinates in Z-like order (top-left, top-right, bottom-left,bottom-right)
|
||||
// of the area to be highlighted
|
||||
return new float[]{x1, y1, x2, y2, x1, y2 - rectangle.getHeight(), x2, y1 - rectangle.getHeight()};
|
||||
}
|
||||
|
||||
|
||||
private void drawSectionGrid(PDDocument document, PDPage pdPage,
|
||||
List<SectionRectangle> sectionRectangles) throws IOException {
|
||||
|
||||
PDPageContentStream contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, true);
|
||||
for (SectionRectangle sectionRectangle : sectionRectangles) {
|
||||
drawSectionRectangle(contentStream, sectionRectangle);
|
||||
drawSectionPartNumberText(contentStream, sectionRectangle);
|
||||
drawTableCells(contentStream, sectionRectangle);
|
||||
}
|
||||
contentStream.close();
|
||||
}
|
||||
|
||||
|
||||
private void drawSectionRectangle(PDPageContentStream contentStream,
|
||||
SectionRectangle sectionRectangle) throws IOException {
|
||||
|
||||
contentStream.setStrokingColor(Color.LIGHT_GRAY);
|
||||
contentStream.setLineWidth(0.5f);
|
||||
contentStream.addRect(sectionRectangle.getTopLeft().getX(), sectionRectangle.getTopLeft()
|
||||
.getY(), sectionRectangle.getWidth(), sectionRectangle.getHeight());
|
||||
contentStream.stroke();
|
||||
}
|
||||
|
||||
|
||||
private void drawSectionPartNumberText(PDPageContentStream contentStream,
|
||||
SectionRectangle sectionRectangle) throws IOException {
|
||||
|
||||
contentStream.beginText();
|
||||
contentStream.setNonStrokingColor(Color.DARK_GRAY);
|
||||
contentStream.setFont(PDType1Font.TIMES_ROMAN, 8f);
|
||||
if (sectionRectangle.getTableCells() == null) {
|
||||
contentStream.newLineAtOffset(sectionRectangle.getTopLeft().getX(), sectionRectangle.getTopLeft()
|
||||
.getY() + sectionRectangle.getHeight());
|
||||
} else {
|
||||
contentStream.newLineAtOffset(sectionRectangle.getTopLeft().getX(), sectionRectangle.getTopLeft().getY());
|
||||
}
|
||||
contentStream.showText(sectionRectangle.getPart() + "/" + sectionRectangle.getNumberOfParts());
|
||||
contentStream.endText();
|
||||
}
|
||||
|
||||
|
||||
private void drawTableCells(PDPageContentStream contentStream,
|
||||
SectionRectangle sectionRectangle) throws IOException {
|
||||
|
||||
if (sectionRectangle.getTableCells() != null) {
|
||||
for (CellRectangle cell : sectionRectangle.getTableCells()) {
|
||||
contentStream.setLineWidth(0.5f);
|
||||
contentStream.setStrokingColor(Color.CYAN);
|
||||
contentStream.addRect(cell.getTopLeft().getX(), cell.getTopLeft()
|
||||
.getY(), cell.getWidth(), cell.getHeight());
|
||||
contentStream.stroke();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private Map<Integer, List<RedactionLogEntry>> convertRedactionLog(RedactionLog redactionLog) {
|
||||
|
||||
Map<Integer, List<RedactionLogEntry>> redactionLogPerPage = new HashMap<>();
|
||||
if (redactionLog == null) {
|
||||
return redactionLogPerPage;
|
||||
}
|
||||
for (RedactionLogEntry entry : redactionLog.getRedactionLogEntry()) {
|
||||
int page = 0;
|
||||
for (Rectangle position : entry.getPositions()) {
|
||||
if (position.getPage() != page) {
|
||||
redactionLogPerPage.computeIfAbsent(position.getPage(), x -> new ArrayList<>()).add(entry);
|
||||
page = position.getPage();
|
||||
}
|
||||
}
|
||||
}
|
||||
return redactionLogPerPage;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,4 +1,14 @@
|
||||
package com.iqser.red.service.redaction.v1.server.visualization.service;
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.CellRectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.Comment;
|
||||
@ -17,58 +27,34 @@ import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
|
||||
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationText;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup;
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.GregorianCalendar;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class AnnotationHighlightService {
|
||||
public class RedactionLogCreatorService {
|
||||
|
||||
private final DictionaryService dictionaryService;
|
||||
|
||||
|
||||
public void highlight(PDDocument document, Document classifiedDoc, boolean flatRedaction,
|
||||
ManualRedactions manualRedactions, String ruleSetId) throws IOException {
|
||||
public void createRedactionLog(Document classifiedDoc, int numberOfPages, ManualRedactions manualRedactions,
|
||||
String ruleSetId) {
|
||||
|
||||
Set<Integer> manualRedactionPages = getManualRedactionPages(manualRedactions);
|
||||
|
||||
for (int page = 1; page <= document.getNumberOfPages(); page++) {
|
||||
for (int page = 1; page <= numberOfPages; page++) {
|
||||
|
||||
PDPage pdPage = document.getPage(page - 1);
|
||||
|
||||
drawSectionFrames(document, classifiedDoc, flatRedaction, pdPage, page);
|
||||
addSectionGrid(classifiedDoc, page);
|
||||
|
||||
if (classifiedDoc.getEntities().get(page) != null) {
|
||||
addAnnotations(pdPage, classifiedDoc, flatRedaction, manualRedactions, page, ruleSetId);
|
||||
addEntries(classifiedDoc, manualRedactions, page, ruleSetId);
|
||||
}
|
||||
|
||||
if (manualRedactionPages.contains(page)) {
|
||||
addManualAnnotations(pdPage, classifiedDoc, manualRedactions, page, ruleSetId);
|
||||
addManualEntries(classifiedDoc, manualRedactions, page, ruleSetId);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -91,10 +77,7 @@ public class AnnotationHighlightService {
|
||||
}
|
||||
|
||||
|
||||
private void addAnnotations(PDPage pdPage, Document classifiedDoc, boolean flatRedaction,
|
||||
ManualRedactions manualRedactions, int page, String ruleSetId) throws IOException {
|
||||
|
||||
List<PDAnnotation> annotations = pdPage.getAnnotations();
|
||||
private void addEntries(Document classifiedDoc, ManualRedactions manualRedactions, int page, String ruleSetId) {
|
||||
|
||||
// Duplicates can exist due table extraction colums over multiple rows.
|
||||
Set<String> processedIds = new HashSet<>();
|
||||
@ -102,11 +85,6 @@ public class AnnotationHighlightService {
|
||||
entityLoop:
|
||||
for (Entity entity : classifiedDoc.getEntities().get(page)) {
|
||||
|
||||
if (flatRedaction && !isRedactionType(entity, ruleSetId)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
boolean requestedToRemove = false;
|
||||
List<Comment> comments = null;
|
||||
|
||||
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
|
||||
@ -131,7 +109,6 @@ public class AnnotationHighlightService {
|
||||
redactionLogEntry.setStatus(Status.APPROVED);
|
||||
manualOverrideReason = entity.getRedactionReason() + ", removed by manual override";
|
||||
} else if (manualRemoval.getStatus().equals(Status.REQUESTED)) {
|
||||
requestedToRemove = true;
|
||||
manualOverrideReason = entity.getRedactionReason() + ", requested to remove";
|
||||
redactionLogEntry.setStatus(Status.REQUESTED);
|
||||
} else {
|
||||
@ -157,9 +134,9 @@ public class AnnotationHighlightService {
|
||||
comments = manualRedactions.getComments().get(entityPositionSequence.getId());
|
||||
}
|
||||
|
||||
redactionLogEntry.setComments(comments);
|
||||
redactionLogEntry.getPositions().addAll(rectanglesPerLine);
|
||||
|
||||
annotations.addAll(createAnnotation(rectanglesPerLine, entityPositionSequence.getId(), createAnnotationContent(entity), getColor(entity, ruleSetId, requestedToRemove), comments, !isHint(entity, ruleSetId), pdPage.getMediaBox(), pdPage.getCropBox()));
|
||||
}
|
||||
|
||||
redactionLogEntry.setId(entityPositionSequence.getId());
|
||||
@ -169,9 +146,7 @@ public class AnnotationHighlightService {
|
||||
classifiedDoc.getRedactionLogEntities().add(redactionLogEntry);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -200,15 +175,13 @@ public class AnnotationHighlightService {
|
||||
}
|
||||
|
||||
|
||||
private void addManualAnnotations(PDPage pdPage, Document classifiedDoc, ManualRedactions manualRedactions,
|
||||
int page, String ruleSetId) throws IOException {
|
||||
private void addManualEntries(Document classifiedDoc, ManualRedactions manualRedactions, int page,
|
||||
String ruleSetId) {
|
||||
|
||||
if (manualRedactions == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
List<PDAnnotation> annotations = pdPage.getAnnotations();
|
||||
|
||||
for (ManualRedactionEntry manualRedactionEntry : manualRedactions.getEntriesToAdd()) {
|
||||
|
||||
String id = manualRedactionEntry.getId();
|
||||
@ -223,9 +196,8 @@ public class AnnotationHighlightService {
|
||||
}
|
||||
}
|
||||
|
||||
redactionLogEntry.setComments(manualRedactions.getComments().get(id));
|
||||
if (!rectanglesOnPage.isEmpty() && !approvedAndShouldBeInDictionary(manualRedactionEntry)) {
|
||||
annotations.addAll(createAnnotation(rectanglesOnPage, id, createAnnotationContent(manualRedactionEntry), getColorForManualAdd(manualRedactionEntry
|
||||
.getType(), ruleSetId, manualRedactionEntry.getStatus()), manualRedactions.getComments().get(id), true, pdPage.getMediaBox(), pdPage.getCropBox()));
|
||||
classifiedDoc.getRedactionLogEntities().add(redactionLogEntry);
|
||||
}
|
||||
}
|
||||
@ -238,11 +210,12 @@ public class AnnotationHighlightService {
|
||||
}
|
||||
|
||||
|
||||
private RedactionLogEntry createRedactionLogEntry(ManualRedactionEntry manualRedactionEntry, String id, String ruleSetId) {
|
||||
private RedactionLogEntry createRedactionLogEntry(ManualRedactionEntry manualRedactionEntry, String id,
|
||||
String ruleSetId) {
|
||||
|
||||
return RedactionLogEntry.builder()
|
||||
.id(id)
|
||||
.color(getColor(manualRedactionEntry.getType(), ruleSetId))
|
||||
.color(getColorForManualAdd(manualRedactionEntry.getType(), ruleSetId, manualRedactionEntry.getStatus()))
|
||||
.reason(manualRedactionEntry.getReason())
|
||||
.legalBasis(manualRedactionEntry.getLegalBasis())
|
||||
.value(manualRedactionEntry.getValue())
|
||||
@ -280,139 +253,6 @@ public class AnnotationHighlightService {
|
||||
}
|
||||
|
||||
|
||||
private List<PDAnnotation> createAnnotation(List<Rectangle> rectangles, String id, String content, float[] color,
|
||||
List<Comment> comments, boolean popup, PDRectangle mediaBox, PDRectangle cropBox) {
|
||||
|
||||
List<PDAnnotation> annotations = new ArrayList<>();
|
||||
|
||||
PDAnnotationTextMarkup annotation = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
|
||||
annotation.constructAppearances();
|
||||
PDRectangle pdRectangle = toPDRectangle(rectangles, mediaBox, cropBox);
|
||||
annotation.setRectangle(pdRectangle);
|
||||
annotation.setQuadPoints(toQuadPoints(rectangles, mediaBox, cropBox));
|
||||
if (popup) {
|
||||
annotation.setContents(content);
|
||||
}
|
||||
annotation.setTitlePopup(id);
|
||||
annotation.setAnnotationName(id);
|
||||
annotation.setColor(new PDColor(color, PDDeviceRGB.INSTANCE));
|
||||
annotations.add(annotation);
|
||||
|
||||
if (comments != null) {
|
||||
for (Comment comment : comments) {
|
||||
PDAnnotationText txtAnnot = new PDAnnotationText();
|
||||
txtAnnot.setAnnotationName(comment.getId());
|
||||
txtAnnot.setInReplyTo(annotation); // Reference to highlight annotation
|
||||
txtAnnot.setName(PDAnnotationText.NAME_COMMENT);
|
||||
txtAnnot.setCreationDate(GregorianCalendar.from(comment.getDate().toZonedDateTime()));
|
||||
txtAnnot.setTitlePopup(comment.getUser());
|
||||
txtAnnot.setContents(comment.getText());
|
||||
txtAnnot.setRectangle(pdRectangle);
|
||||
annotations.add(txtAnnot);
|
||||
}
|
||||
}
|
||||
|
||||
return annotations;
|
||||
}
|
||||
|
||||
|
||||
private String createAnnotationContent(Entity entity) {
|
||||
|
||||
return "\nRule " + entity.getMatchedRule() + " matched\n\n" + entity.getRedactionReason() + "\n\nLegal basis:" + entity
|
||||
.getLegalBasis() + "\n\nIn section: \"" + entity.getHeadline() + "\"";
|
||||
}
|
||||
|
||||
|
||||
private String createAnnotationContent(ManualRedactionEntry entry) {
|
||||
|
||||
return "\nManual Redaction\n\nIn Section : \"" + entry.getSection() + "\"";
|
||||
}
|
||||
|
||||
|
||||
private PDRectangle toPDRectangle(List<Rectangle> rectangles, PDRectangle mediaBox, PDRectangle cropBox) {
|
||||
|
||||
float lowerLeftX = Float.MAX_VALUE;
|
||||
float upperRightX = 0;
|
||||
float lowerLeftY = 0;
|
||||
float upperRightY = Float.MAX_VALUE;
|
||||
|
||||
for (Rectangle rectangle : rectangles) {
|
||||
if (rectangle.getTopLeft().getX() < lowerLeftX) {
|
||||
lowerLeftX = rectangle.getTopLeft().getX();
|
||||
}
|
||||
if (rectangle.getTopLeft().getX() + rectangle.getWidth() > upperRightX) {
|
||||
upperRightX = rectangle.getTopLeft().getX() + rectangle.getWidth();
|
||||
}
|
||||
if (rectangle.getTopLeft().getY() + rectangle.getHeight() > lowerLeftY) {
|
||||
lowerLeftY = rectangle.getTopLeft().getY() + rectangle.getHeight();
|
||||
}
|
||||
if (rectangle.getTopLeft().getY() < upperRightY) {
|
||||
upperRightY = rectangle.getTopLeft().getY();
|
||||
}
|
||||
}
|
||||
|
||||
var x1 = lowerLeftX + cropBox.getLowerLeftX() - mediaBox.getLowerLeftY();
|
||||
var y1 = lowerLeftY + (mediaBox.getLowerLeftY() - cropBox.getLowerLeftY());
|
||||
var x2 = upperRightX + cropBox.getLowerLeftX() - mediaBox.getLowerLeftY();
|
||||
var y2 = upperRightY - (mediaBox.getLowerLeftY() - cropBox.getLowerLeftY());
|
||||
|
||||
PDRectangle annotationPosition = new PDRectangle();
|
||||
annotationPosition.setLowerLeftX(x1);
|
||||
annotationPosition.setLowerLeftY(y1);
|
||||
annotationPosition.setUpperRightX(x2);
|
||||
annotationPosition.setUpperRightY(y2);
|
||||
return annotationPosition;
|
||||
}
|
||||
|
||||
|
||||
private float[] toQuadPoints(List<Rectangle> rectangles,PDRectangle mediaBox, PDRectangle cropBox) {
|
||||
|
||||
float[] quadPoints = new float[rectangles.size() * 8];
|
||||
int i = 0;
|
||||
|
||||
for (Rectangle rectangle : rectangles) {
|
||||
float[] quadPoint = toQuadPoint(rectangle, mediaBox, cropBox);
|
||||
for (int j = 0; j <= 7; j++) {
|
||||
quadPoints[i + j] = quadPoint[j];
|
||||
}
|
||||
i += 8;
|
||||
}
|
||||
return quadPoints;
|
||||
}
|
||||
|
||||
|
||||
private float[] toQuadPoint(Rectangle rectangle, PDRectangle mediaBox, PDRectangle cropBox) {
|
||||
|
||||
var x1 = rectangle.getTopLeft().getX() + cropBox.getLowerLeftX() - mediaBox.getLowerLeftY();
|
||||
var y1 = rectangle.getTopLeft().getY() + (mediaBox.getLowerLeftY() - cropBox.getLowerLeftY());
|
||||
var x2 = rectangle.getTopLeft().getX() + rectangle.getWidth() + cropBox.getLowerLeftX() - mediaBox.getLowerLeftY();
|
||||
var y2 = rectangle.getTopLeft().getY() + rectangle.getHeight() - (mediaBox.getLowerLeftY() - cropBox.getLowerLeftY());
|
||||
|
||||
|
||||
// quadPoints is array of x,y coordinates in Z-like order (top-left, top-right, bottom-left,bottom-right)
|
||||
// of the area to be highlighted
|
||||
return new float[]{
|
||||
x1,
|
||||
y1,
|
||||
x2,
|
||||
y2,
|
||||
x1,
|
||||
y2 - rectangle.getHeight(),
|
||||
x2,
|
||||
y1 - rectangle.getHeight()
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
private boolean isRedactionType(Entity entity, String ruleSetId) {
|
||||
|
||||
if (!entity.isRedaction()) {
|
||||
return false;
|
||||
}
|
||||
return !isHint(entity, ruleSetId);
|
||||
}
|
||||
|
||||
|
||||
private float[] getColor(Entity entity, String ruleSetId, boolean requestedToRemove) {
|
||||
|
||||
if (requestedToRemove) {
|
||||
@ -437,27 +277,25 @@ public class AnnotationHighlightService {
|
||||
|
||||
|
||||
private float[] getColor(String type, String ruleSetId) {
|
||||
|
||||
return dictionaryService.getColor(type, ruleSetId);
|
||||
}
|
||||
|
||||
|
||||
private boolean isHint(Entity entity, String ruleSetId) {
|
||||
|
||||
return dictionaryService.isHint(entity.getType(), ruleSetId);
|
||||
}
|
||||
|
||||
|
||||
private boolean isRecommendation(Entity entity, String ruleSetId) {
|
||||
|
||||
return dictionaryService.isRecommendation(entity.getType(), ruleSetId);
|
||||
}
|
||||
|
||||
|
||||
private void drawSectionFrames(PDDocument document, Document classifiedDoc, boolean flatRedaction, PDPage pdPage,
|
||||
int page) throws IOException {
|
||||
private void addSectionGrid(Document classifiedDoc, int page) {
|
||||
|
||||
if (flatRedaction) {
|
||||
return;
|
||||
}
|
||||
|
||||
PDPageContentStream contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, true);
|
||||
for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
|
||||
|
||||
for (int i = 0; i <= paragraph.getPageBlocks().size() - 1; i++) {
|
||||
@ -467,10 +305,8 @@ public class AnnotationHighlightService {
|
||||
if (textBlock.getPage() != page) {
|
||||
continue;
|
||||
}
|
||||
if (textBlock instanceof TextBlock) {
|
||||
|
||||
textBlock.setClassification((i + 1) + "/" + paragraph.getPageBlocks().size());
|
||||
visualizeTextBlock((TextBlock) textBlock, contentStream);
|
||||
if (textBlock instanceof TextBlock) {
|
||||
|
||||
classifiedDoc.getSectionGrid()
|
||||
.getRectanglesPerPage()
|
||||
@ -480,8 +316,15 @@ public class AnnotationHighlightService {
|
||||
|
||||
} else if (textBlock instanceof Table) {
|
||||
|
||||
textBlock.setClassification((i + 1) + "/" + paragraph.getPageBlocks().size());
|
||||
List<CellRectangle> cellRectangles = visualizeTable((Table) textBlock, contentStream);
|
||||
List<CellRectangle> cellRectangles = new ArrayList<>();
|
||||
for (List<Cell> row : ((Table) textBlock).getRows()) {
|
||||
for (Cell cell : row) {
|
||||
if (cell != null) {
|
||||
cellRectangles.add(new CellRectangle(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
|
||||
.getWidth(), (float) cell.getHeight()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
classifiedDoc.getSectionGrid()
|
||||
.getRectanglesPerPage()
|
||||
@ -492,56 +335,6 @@ public class AnnotationHighlightService {
|
||||
}
|
||||
}
|
||||
}
|
||||
contentStream.close();
|
||||
}
|
||||
|
||||
|
||||
private void visualizeTextBlock(TextBlock textBlock, PDPageContentStream contentStream) throws IOException {
|
||||
|
||||
contentStream.setStrokingColor(Color.LIGHT_GRAY);
|
||||
contentStream.setLineWidth(0.5f);
|
||||
contentStream.addRect(textBlock.getMinX(), textBlock.getMinY(), textBlock.getWidth(), textBlock.getHeight());
|
||||
contentStream.stroke();
|
||||
|
||||
if (textBlock.getClassification() != null) {
|
||||
contentStream.beginText();
|
||||
contentStream.setNonStrokingColor(Color.DARK_GRAY);
|
||||
contentStream.setFont(PDType1Font.TIMES_ROMAN, 8f);
|
||||
contentStream.newLineAtOffset(textBlock.getMinX(), textBlock.getMaxY());
|
||||
contentStream.showText(textBlock.getClassification());
|
||||
contentStream.endText();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private List<CellRectangle> visualizeTable(Table table, PDPageContentStream contentStream) throws IOException {
|
||||
|
||||
List<CellRectangle> cellRectangles = new ArrayList<>();
|
||||
for (List<Cell> row : table.getRows()) {
|
||||
for (Cell cell : row) {
|
||||
|
||||
if (cell != null) {
|
||||
contentStream.setLineWidth(0.5f);
|
||||
contentStream.setStrokingColor(Color.CYAN);
|
||||
contentStream.addRect((float) cell.getX(), (float) cell.getY(), (float) cell.getWidth(), (float) cell
|
||||
.getHeight());
|
||||
contentStream.stroke();
|
||||
cellRectangles.add(new CellRectangle(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
|
||||
.getWidth(), (float) cell.getHeight()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (table.getClassification() != null) {
|
||||
contentStream.beginText();
|
||||
contentStream.setNonStrokingColor(Color.DARK_GRAY);
|
||||
contentStream.setFont(PDType1Font.TIMES_ROMAN, 8f);
|
||||
contentStream.newLineAtOffset(table.getMinX(), table.getMinY());
|
||||
contentStream.showText(table.getClassification());
|
||||
contentStream.endText();
|
||||
}
|
||||
|
||||
return cellRectangles;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,42 +1,8 @@
|
||||
package com.iqser.red.service.redaction.v1.server;
|
||||
|
||||
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
||||
import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.Comment;
|
||||
import com.iqser.red.service.redaction.v1.model.IdRemoval;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
|
||||
import com.iqser.red.service.redaction.v1.model.Point;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
||||
import com.iqser.red.service.redaction.v1.model.Status;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.kie.api.KieServices;
|
||||
import org.kie.api.builder.KieBuilder;
|
||||
import org.kie.api.builder.KieFileSystem;
|
||||
import org.kie.api.builder.KieModule;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.context.TestConfiguration;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.Mockito.when;
|
||||
import static org.springframework.boot.test.context.SpringBootTest.WebEnvironment.RANDOM_PORT;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
@ -57,9 +23,47 @@ import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.Mockito.when;
|
||||
import static org.springframework.boot.test.context.SpringBootTest.WebEnvironment.RANDOM_PORT;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.kie.api.KieServices;
|
||||
import org.kie.api.builder.KieBuilder;
|
||||
import org.kie.api.builder.KieFileSystem;
|
||||
import org.kie.api.builder.KieModule;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.context.TestConfiguration;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
||||
import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.AnnotateRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.AnnotateResponse;
|
||||
import com.iqser.red.service.redaction.v1.model.Comment;
|
||||
import com.iqser.red.service.redaction.v1.model.IdRemoval;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
|
||||
import com.iqser.red.service.redaction.v1.model.Point;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
||||
import com.iqser.red.service.redaction.v1.model.Status;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
|
||||
@RunWith(SpringRunner.class)
|
||||
@SpringBootTest(webEnvironment = RANDOM_PORT)
|
||||
@ -376,12 +380,12 @@ public class RedactionIntegrationTest {
|
||||
input.addAll(getPathsRecursively(file));
|
||||
}
|
||||
for (File path : input) {
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
.document(IOUtils.toByteArray(new FileInputStream(path)))
|
||||
.build();
|
||||
System.out.println("Redacting file : " + path.getName());
|
||||
RedactionResult result = redactionController.redact(request);
|
||||
AnalyzeResult result = redactionController.analyze(request);
|
||||
|
||||
Map<String, List<RedactionLogEntry>> duplicates = new HashMap<>();
|
||||
result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
|
||||
@ -423,25 +427,23 @@ public class RedactionIntegrationTest {
|
||||
|
||||
System.out.println("redactionTest");
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/new/Lambda-cyhalothrin - Toxicokinetics - Rat - Spain - 2006.pdf");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.build();
|
||||
request.setFlatRedaction(false);
|
||||
|
||||
RedactionResult result = redactionController.redact(request);
|
||||
|
||||
// result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
|
||||
// if(!entry.isHint()){
|
||||
// System.out.println(entry.getPositions().get(0).getPage() +":"+ entry.getTextBefore() +"--->"+ entry.getValue() + "--->" + entry.getTextAfter());
|
||||
// }
|
||||
// });
|
||||
AnalyzeResult result = redactionController.analyze(request);
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Redacted.pdf")) {
|
||||
fileOutputStream.write(result.getDocument());
|
||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).redactionLog(result.getRedactionLog()).sectionGrid(result.getSectionGrid()).build());
|
||||
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
System.out.println("duration: " + (end - start));
|
||||
@ -456,15 +458,19 @@ public class RedactionIntegrationTest {
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.build();
|
||||
|
||||
RedactionResult result = redactionController.redact(request);
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Redacted.pdf")) {
|
||||
fileOutputStream.write(result.getDocument());
|
||||
AnalyzeResult result = redactionController.analyze(request);
|
||||
|
||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).redactionLog(result.getRedactionLog()).sectionGrid(result.getSectionGrid()).build());
|
||||
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
@ -508,16 +514,20 @@ public class RedactionIntegrationTest {
|
||||
|
||||
manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.manualRedactions(manualRedactions)
|
||||
.build();
|
||||
|
||||
RedactionResult result = redactionController.redact(request);
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Redacted.pdf")) {
|
||||
fileOutputStream.write(result.getDocument());
|
||||
AnalyzeResult result = redactionController.analyze(request);
|
||||
|
||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).redactionLog(result.getRedactionLog()).sectionGrid(result.getSectionGrid()).build());
|
||||
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
@ -603,13 +613,13 @@ public class RedactionIntegrationTest {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Phantom Cells.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.build();
|
||||
request.setFlatRedaction(false);
|
||||
|
||||
RedactionResult result = redactionController.redact(request);
|
||||
|
||||
AnalyzeResult result = redactionController.analyze(request);
|
||||
|
||||
result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
|
||||
if (!entry.isHint()) {
|
||||
@ -625,16 +635,19 @@ public class RedactionIntegrationTest {
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/sponsor_companies.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
.flatRedaction(false)
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.build();
|
||||
|
||||
RedactionResult result = redactionController.redact(request);
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Redacted.pdf")) {
|
||||
fileOutputStream.write(result.getDocument());
|
||||
AnalyzeResult result = redactionController.analyze(request);
|
||||
|
||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).redactionLog(result.getRedactionLog()).sectionGrid(result.getSectionGrid()).build());
|
||||
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user