Pull request #382: Pdftron api cleanup

Merge in RED/redaction-service from pdftron-api-cleanup to master

* commit 'f666a0330161daed8d3f59189bbe258327926995':
  RED-3800 code cleanup
  RED-3800 code cleanup
This commit is contained in:
Timo Bejan 2022-05-03 12:26:22 +02:00
commit 314f7ebdd9
8 changed files with 103 additions and 86 deletions

View File

@ -12,7 +12,7 @@
<artifactId>redaction-service-api-v1</artifactId>
<properties>
<persistence-service.version>1.143.0</persistence-service.version>
<persistence-service.version>1.166.0</persistence-service.version>
</properties>
<dependencies>

View File

@ -11,10 +11,6 @@ import org.springframework.web.bind.annotation.RequestParam;
public interface RedactionResource {
@PostMapping(value = "/annotate", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest);
@PostMapping(value = "/debug/classifications", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
RedactionResult classify(@RequestBody RedactionRequest redactionRequest);

View File

@ -16,7 +16,6 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestBody;
@ -33,42 +32,11 @@ public class RedactionController implements RedactionResource {
private final PdfVisualisationService pdfVisualisationService;
private final DroolsExecutionService droolsExecutionService;
private final DictionaryService dictionaryService;
private final AnnotationService annotationService;
private final PdfSegmentationService pdfSegmentationService;
private final RedactionStorageService redactionStorageService;
private final RedactionLogMergeService redactionLogMergeService;
private final ManualRedactionSurroundingTextService manualRedactionSurroundingTextService;
public AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest) {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(annotateRequest.getDossierId(), annotateRequest.getFileId(), FileType.ORIGIN));
var mergedRedactionLog = getRedactionLog(RedactionRequest.builder()
.fileId(annotateRequest.getFileId())
.manualRedactions(annotateRequest.getManualRedactions())
.dossierId(annotateRequest.getDossierId())
.dossierTemplateId(annotateRequest.getDossierTemplateId())
.build());
var sectionsGrid = redactionStorageService.getSectionGrid(annotateRequest.getDossierId(), annotateRequest.getFileId());
try (PDDocument pdDocument = PDDocument.load(storedObjectStream, MemoryUsageSetting.setupTempFileOnly())) {
pdDocument.setAllSecurityToBeRemoved(true);
dictionaryService.updateDictionary(annotateRequest.getDossierTemplateId(), annotateRequest.getDossierId());
annotationService.annotate(pdDocument, mergedRedactionLog, sectionsGrid);
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
pdDocument.save(byteArrayOutputStream);
return AnnotateResponse.builder().document(byteArrayOutputStream.toByteArray()).build();
}
} catch (Exception e) {
throw new RedactionException(e);
}
}
@Override
public RedactionResult classify(@RequestBody RedactionRequest redactionRequest) {
@ -157,39 +125,8 @@ public class RedactionController implements RedactionResource {
@Override
public RedactionLog getRedactionLog(RedactionRequest redactionRequest) {
log.debug("Requested preview for: {}", redactionRequest);
var redactionLog = redactionStorageService.getRedactionLog(redactionRequest.getDossierId(), redactionRequest.getFileId());
if (redactionLog == null) {
throw new NotFoundException("RedactionLog not present");
}
log.info("Loaded redaction log with computationalVersion: {}", redactionLog.getAnalysisVersion());
SectionGrid sectionGrid = redactionStorageService.getSectionGrid(redactionRequest.getDossierId(), redactionRequest.getFileId());
if (sectionGrid.getSections().isEmpty()) {
log.info("SectionGrid does not have headlines set. Computing headlines now!");
var text = redactionStorageService.getText(redactionRequest.getDossierId(), redactionRequest.getFileId());
// enhance section grid with headline data
for (var sectionText : text.getSectionTexts()) {
sectionGrid.getSections()
.add(new SectionGrid.SectionGridSection(sectionText.getSectionNumber(), sectionText.getHeadline(), sectionText.getSectionAreas()
.stream()
.map(SectionArea::getPage)
.collect(Collectors.toSet()), sectionText.getSectionAreas()));
}
redactionStorageService.storeObject(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.SECTION_GRID, sectionGrid);
}
log.info("Loaded redaction log with computationalVersion: {}", redactionLog.getAnalysisVersion());
var merged = redactionLogMergeService.mergeRedactionLogData(redactionLog, sectionGrid, redactionRequest.getManualRedactions(), redactionRequest.getExcludedPages(), redactionRequest.getTypes(), redactionRequest.getColors());
merged.getRedactionLogEntry().removeIf(e -> e.isFalsePositive() && !redactionRequest.isIncludeFalsePositives());
return merged;
return redactionLogMergeService.provideRedactionLog(redactionRequest);
}

View File

@ -5,10 +5,13 @@ import com.iqser.red.service.persistence.service.v1.api.model.annotations.Commen
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.*;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.model.Point;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.model.*;
import com.iqser.red.service.redaction.v1.server.exception.NotFoundException;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.RequiredArgsConstructor;
@ -29,8 +32,45 @@ public class RedactionLogMergeService {
private final SectionTextService sectionTextService;
private final RedactionStorageService redactionStorageService;
public RedactionLog mergeRedactionLogData(RedactionLog redactionLog, SectionGrid sectionGrid, ManualRedactions manualRedactions,
public RedactionLog provideRedactionLog(RedactionRequest redactionRequest) {
log.debug("Requested preview for: {}", redactionRequest);
var redactionLog = redactionStorageService.getRedactionLog(redactionRequest.getDossierId(), redactionRequest.getFileId());
if (redactionLog == null) {
throw new NotFoundException("RedactionLog not present");
}
log.info("Loaded redaction log with computationalVersion: {}", redactionLog.getAnalysisVersion());
SectionGrid sectionGrid = redactionStorageService.getSectionGrid(redactionRequest.getDossierId(), redactionRequest.getFileId());
if (sectionGrid.getSections().isEmpty()) {
log.info("SectionGrid does not have headlines set. Computing headlines now!");
var text = redactionStorageService.getText(redactionRequest.getDossierId(), redactionRequest.getFileId());
// enhance section grid with headline data
for (var sectionText : text.getSectionTexts()) {
sectionGrid.getSections()
.add(new SectionGrid.SectionGridSection(sectionText.getSectionNumber(), sectionText.getHeadline(), sectionText.getSectionAreas()
.stream()
.map(SectionArea::getPage)
.collect(Collectors.toSet()), sectionText.getSectionAreas()));
}
redactionStorageService.storeObject(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.SECTION_GRID, sectionGrid);
}
log.info("Loaded redaction log with computationalVersion: {}", redactionLog.getAnalysisVersion());
var merged = mergeRedactionLogData(redactionLog, sectionGrid, redactionRequest.getManualRedactions(), redactionRequest.getExcludedPages(), redactionRequest.getTypes(), redactionRequest.getColors());
merged.getRedactionLogEntry().removeIf(e -> e.isFalsePositive() && !redactionRequest.isIncludeFalsePositives());
return merged;
}
private RedactionLog mergeRedactionLogData(RedactionLog redactionLog, SectionGrid sectionGrid, ManualRedactions manualRedactions,
Set<Integer> excludedPages, List<Type> types, Colors colors) {
var skippedImportedRedactions = new HashSet<>();

View File

@ -13,6 +13,9 @@ import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.do
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.model.*;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
@ -91,6 +94,9 @@ public class RedactionIntegrationTest {
@Autowired
private RedactionController redactionController;
@Autowired
private AnnotationService annotationService;
@Autowired
private AnalyzeService analyzeService;
@ -279,7 +285,7 @@ public class RedactionIntegrationTest {
dictionary.get(AUTHOR).add("Drinking water");
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(1L);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());
@ -516,7 +522,7 @@ public class RedactionIntegrationTest {
end = System.currentTimeMillis();
System.out.println("reanalysis analysis duration: " + (end - start));
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());
@ -580,7 +586,7 @@ public class RedactionIntegrationTest {
assertThat(changes.size()).isEqualTo(2);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());
@ -687,7 +693,7 @@ public class RedactionIntegrationTest {
end = System.currentTimeMillis();
System.out.println("reanalysis analysis duration: " + (end - start));
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());
@ -722,7 +728,7 @@ public class RedactionIntegrationTest {
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());
@ -812,7 +818,7 @@ public class RedactionIntegrationTest {
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());
@ -946,7 +952,7 @@ public class RedactionIntegrationTest {
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());
@ -1078,7 +1084,7 @@ public class RedactionIntegrationTest {
analyzeService.reanalyze(request);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
@ -1102,7 +1108,7 @@ public class RedactionIntegrationTest {
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());
@ -1179,7 +1185,7 @@ public class RedactionIntegrationTest {
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
.dossierId(TEST_DOSSIER_ID)
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.fileId(TEST_FILE_ID)
@ -1526,7 +1532,7 @@ public class RedactionIntegrationTest {
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());
@ -1555,7 +1561,7 @@ public class RedactionIntegrationTest {
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
analyzeService.analyze(request);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.model;
package com.iqser.red.service.redaction.v1.server.annotate;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.model;
package com.iqser.red.service.redaction.v1.server.annotate;
import lombok.AllArgsConstructor;
import lombok.Builder;

View File

@ -1,9 +1,15 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
package com.iqser.red.service.redaction.v1.server.annotate;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Comment;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.model.*;
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogMergeService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.RequiredArgsConstructor;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
@ -17,6 +23,7 @@ import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup;
import org.springframework.stereotype.Service;
import java.awt.*;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.*;
@ -26,8 +33,39 @@ import java.util.stream.Collectors;
@RequiredArgsConstructor
public class AnnotationService {
private final RedactionStorageService redactionStorageService;
private final DictionaryService dictionaryService;
public void annotate(PDDocument document, RedactionLog redactionLog, SectionGrid sectionGrid) throws IOException {
private final RedactionLogMergeService redactionLogMergeService;
public AnnotateResponse annotate(AnnotateRequest annotateRequest) {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(annotateRequest.getDossierId(), annotateRequest.getFileId(), FileType.ORIGIN));
var mergedRedactionLog = redactionLogMergeService.provideRedactionLog(RedactionRequest.builder()
.fileId(annotateRequest.getFileId())
.manualRedactions(annotateRequest.getManualRedactions())
.dossierId(annotateRequest.getDossierId())
.dossierTemplateId(annotateRequest.getDossierTemplateId())
.build());
var sectionsGrid = redactionStorageService.getSectionGrid(annotateRequest.getDossierId(), annotateRequest.getFileId());
try (PDDocument pdDocument = PDDocument.load(storedObjectStream, MemoryUsageSetting.setupTempFileOnly())) {
pdDocument.setAllSecurityToBeRemoved(true);
dictionaryService.updateDictionary(annotateRequest.getDossierTemplateId(), annotateRequest.getDossierId());
annotate(pdDocument, mergedRedactionLog, sectionsGrid);
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
pdDocument.save(byteArrayOutputStream);
return AnnotateResponse.builder().document(byteArrayOutputStream.toByteArray()).build();
}
} catch (Exception e) {
throw new RedactionException(e);
}
}
private void annotate(PDDocument document, RedactionLog redactionLog, SectionGrid sectionGrid) throws IOException {
Map<Integer, List<RedactionLogEntry>> redactionLogPerPage = convertRedactionLog(redactionLog);