diff --git a/redaction-service-v1/pom.xml b/redaction-service-v1/pom.xml index ee796600..eddd4fbb 100644 --- a/redaction-service-v1/pom.xml +++ b/redaction-service-v1/pom.xml @@ -5,7 +5,7 @@ platform-dependency com.iqser.red - 1.1.2 + 1.1.3 4.0.0 @@ -32,7 +32,7 @@ com.iqser.red platform-commons-dependency - 1.3.1 + 1.3.6 import pom diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeRequest.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeRequest.java index 4aa290e9..7a891277 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeRequest.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeRequest.java @@ -6,6 +6,11 @@ import lombok.Data; import lombok.NoArgsConstructor; import java.time.OffsetDateTime; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; @Data @Builder @@ -19,6 +24,10 @@ public class AnalyzeRequest { private boolean reanalyseOnlyIfPossible; private ManualRedactions manualRedactions; private OffsetDateTime lastProcessed; + private Set excludedPages; + + @Builder.Default + private List fileAttributes = new ArrayList<>(); } diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeResult.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeResult.java index 1ce9d759..5eadb70f 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeResult.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeResult.java @@ -25,6 +25,8 @@ public class AnalyzeResult { private long rulesVersion; private long legalBasisVersion; + private boolean wasReanalyzed; + } diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/FileAttribute.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/FileAttribute.java new file mode 100644 index 00000000..aea494c9 --- /dev/null +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/FileAttribute.java @@ -0,0 +1,19 @@ +package com.iqser.red.service.redaction.v1.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class FileAttribute { + + private String id; + private String label; + private String placeholder; + private String value; + +} diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualImageRecategorization.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualImageRecategorization.java new file mode 100644 index 00000000..7dc9120c --- /dev/null +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualImageRecategorization.java @@ -0,0 +1,21 @@ +package com.iqser.red.service.redaction.v1.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class ManualImageRecategorization { + + private String id; + private String user; + private Status status; + private String type; + private String legalBasis; + private boolean redacted; + +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualLegalBasisChange.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualLegalBasisChange.java new file mode 100644 index 00000000..4f0d211f --- /dev/null +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualLegalBasisChange.java @@ -0,0 +1,19 @@ +package com.iqser.red.service.redaction.v1.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class ManualLegalBasisChange { + + private String id; + private String user; + private Status status; + private String legalBasis; + +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualRedactionType.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualRedactionType.java index 83df7d67..0bb0c607 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualRedactionType.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualRedactionType.java @@ -1,5 +1,5 @@ package com.iqser.red.service.redaction.v1.model; public enum ManualRedactionType { - ADD, REMOVE, FORCE_REDACT + ADD, REMOVE, FORCE_REDACT, RECATEGORIZE, LEGAL_BASIS_CHANGE } diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualRedactions.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualRedactions.java index af866d09..baffede0 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualRedactions.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualRedactions.java @@ -26,6 +26,12 @@ public class ManualRedactions { @Builder.Default private Set entriesToAdd = new HashSet<>(); + @Builder.Default + private Set imageRecategorizations = new HashSet<>(); + + @Builder.Default + private Set manualLegalBasisChanges = new HashSet<>(); + @Builder.Default private Map> comments = new HashMap<>(); diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionChangeLogEntry.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionChangeLogEntry.java index 3dfbacce..a53d3b0e 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionChangeLogEntry.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionChangeLogEntry.java @@ -44,4 +44,6 @@ public class RedactionChangeLogEntry { private boolean isDossierDictionaryEntry; + private boolean excluded; + } diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java index d8074d4f..e347f0b3 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java @@ -49,4 +49,9 @@ public class RedactionLogEntry { private boolean isDossierDictionaryEntry; + private boolean excluded; + + private String recategorizationType; + private String legalBasisChangeValue; + } diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/resources/RedactionResource.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/resources/RedactionResource.java index cdf45fcb..0a6037a6 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/resources/RedactionResource.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/resources/RedactionResource.java @@ -8,8 +8,6 @@ import org.springframework.web.bind.annotation.RequestBody; public interface RedactionResource { - String SERVICE_NAME = "redaction-service-v1"; - String RULE_SET_PARAMETER_NAME = "dossierTemplateId"; String RULE_SET_PATH_VARIABLE = "/{" + RULE_SET_PARAMETER_NAME + "}"; @@ -32,4 +30,7 @@ public interface RedactionResource { @PostMapping(value = "/rules/test", consumes = MediaType.APPLICATION_JSON_VALUE) void testRules(@RequestBody String rules); + @PostMapping(value = "/redaction-log/preview", consumes = MediaType.APPLICATION_JSON_VALUE) + RedactionLog getRedactionLogPreview(@RequestBody RedactionRequest redactionRequest); + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java index a2e285e4..0149d6a0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java @@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.controller; import com.iqser.red.service.file.management.v1.api.model.FileType; import com.iqser.red.service.redaction.v1.model.AnnotateRequest; import com.iqser.red.service.redaction.v1.model.AnnotateResponse; +import com.iqser.red.service.redaction.v1.model.RedactionLog; import com.iqser.red.service.redaction.v1.model.RedactionRequest; import com.iqser.red.service.redaction.v1.model.RedactionResult; import com.iqser.red.service.redaction.v1.resources.RedactionResource; @@ -12,6 +13,7 @@ import com.iqser.red.service.redaction.v1.server.exception.RedactionException; import com.iqser.red.service.redaction.v1.server.redaction.service.AnnotationService; import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService; import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService; +import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService; import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; @@ -39,7 +41,7 @@ public class RedactionController implements RedactionResource { private final AnnotationService annotationService; private final PdfSegmentationService pdfSegmentationService; private final RedactionStorageService redactionStorageService; - + private final RedactionLogCreatorService redactionLogCreatorService; public AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest) { @@ -155,6 +157,14 @@ public class RedactionController implements RedactionResource { droolsExecutionService.testRules(rules); } + @Override + public RedactionLog getRedactionLogPreview(RedactionRequest redactionRequest) { + + var redactionLog = redactionStorageService.getRedactionLog(redactionRequest.getDossierId(), redactionRequest.getFileId()); + + return redactionLogCreatorService.getRedactionLogPreview(redactionLog, redactionRequest.getDossierTemplateId(), redactionRequest.getManualRedactions()); + } + private RedactionResult convert(PDDocument document, int numberOfPages) throws IOException { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java index c9fdc711..2ae553db 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java @@ -9,7 +9,7 @@ import java.util.List; @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true) -public class Entity { +public class Entity implements ReasonHolder { private final String word; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Image.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Image.java index 766d607d..5aab9c7a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Image.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Image.java @@ -9,7 +9,7 @@ import lombok.NoArgsConstructor; @Builder @NoArgsConstructor @AllArgsConstructor -public class Image { +public class Image implements ReasonHolder { private String type; private RedRectangle2D position; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/ReasonHolder.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/ReasonHolder.java new file mode 100644 index 00000000..51d9b0fd --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/ReasonHolder.java @@ -0,0 +1,14 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model; + +public interface ReasonHolder { + + String getRedactionReason(); + + void setRedactionReason(String reason); + + boolean isRedaction(); + + void setRedaction(boolean value); + + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index fa44f983..dc878a20 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -1,5 +1,6 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; +import com.iqser.red.service.redaction.v1.model.FileAttribute; import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils; import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns; @@ -8,9 +9,11 @@ import lombok.Data; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; +import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; @@ -52,6 +55,22 @@ public class Section { @Builder.Default private Set images = new HashSet<>(); + @Builder.Default + private List fileAttributes = new ArrayList<>(); + + + public boolean fileAttributeByIdEquals(String id, String value){ + return fileAttributes != null && fileAttributes.stream().filter(attribute -> id.equals(attribute.getId()) && value.equals(attribute.getValue())).findFirst().isPresent(); + } + + public boolean fileAttributeByPlaceholderEquals(String placeholder, String value){ + return fileAttributes != null && fileAttributes.stream().filter(attribute -> placeholder.equals(attribute.getPlaceholder()) && value.equals(attribute.getValue())).findFirst().isPresent(); + } + + public boolean fileAttributeByLabelEquals(String label, String value){ + return fileAttributes != null && fileAttributes.stream().filter(attribute -> label.equals(attribute.getLabel()) && value.equals(attribute.getValue())).findFirst().isPresent(); + } + public boolean rowEquals(String headerName, String value) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeResponseService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeResponseService.java index bae6d1d0..ff772521 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeResponseService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeResponseService.java @@ -4,31 +4,42 @@ import com.iqser.red.service.redaction.v1.model.AnalyzeResult; import com.iqser.red.service.redaction.v1.model.RedactionChangeLog; import com.iqser.red.service.redaction.v1.model.RedactionLog; import com.iqser.red.service.redaction.v1.model.RedactionLogEntry; + import org.springframework.stereotype.Service; @Service public class AnalyzeResponseService { - public AnalyzeResult createAnalyzeResponse(String dossierId, String fileId, long duration, int pageCount, RedactionLog redactionLog, RedactionChangeLog redactionChangeLog) { - boolean hasHints = redactionLog.getRedactionLogEntry().stream().anyMatch(RedactionLogEntry::isHint); + public AnalyzeResult createAnalyzeResponse(String dossierId, String fileId, long duration, int pageCount, + RedactionLog redactionLog, RedactionChangeLog redactionChangeLog) { + + boolean hasHints = redactionLog.getRedactionLogEntry() + .stream() + .filter(entry -> !entry.isExcluded()) + .anyMatch(entry -> entry.isHint() && !entry.getType().equals("false_positive")); boolean hasRequests = redactionLog.getRedactionLogEntry() .stream() + .filter(entry -> !entry.isExcluded()) .anyMatch(entry -> entry.isManual() && entry.getStatus() .equals(com.iqser.red.service.redaction.v1.model.Status.REQUESTED)); boolean hasRedactions = redactionLog.getRedactionLogEntry() .stream() + .filter(entry -> !entry.isExcluded()) .anyMatch(entry -> entry.isRedacted() && !entry.isManual() || entry.isManual() && entry.getStatus() .equals(com.iqser.red.service.redaction.v1.model.Status.APPROVED)); boolean hasImages = redactionLog.getRedactionLogEntry() .stream() - .anyMatch(entry -> entry.isHint() && entry.getType().equals("image")); + .filter(entry -> !entry.isExcluded()) + .anyMatch(entry -> entry.isHint() && entry.getType().equals("image") || entry.isImage()); boolean hasUpdates = redactionChangeLog != null && redactionChangeLog.getRedactionLogEntry() != null && !redactionChangeLog .getRedactionLogEntry() - .isEmpty() && redactionChangeLog.getRedactionLogEntry().stream().anyMatch(entry -> !entry.getType().equals("false_positive")); + .isEmpty() && redactionChangeLog.getRedactionLogEntry() + .stream() + .anyMatch(entry -> !entry.getType().equals("false_positive")); return AnalyzeResult.builder() .dossierId(dossierId) @@ -46,4 +57,5 @@ public class AnalyzeResponseService { .dossierDictionaryVersion(redactionLog.getDossierDictionaryVersion()) .build(); } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index 02da3aa7..e6d00c15 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -1,5 +1,6 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; +import com.iqser.red.service.redaction.v1.model.FileAttribute; import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry; import com.iqser.red.service.redaction.v1.model.ManualRedactions; import com.iqser.red.service.redaction.v1.model.Point; @@ -33,7 +34,7 @@ public class EntityRedactionService { private final SurroundingWordsService surroundingWordsService; - public void processDocument(Document classifiedDoc, String dossierTemplateId, ManualRedactions manualRedactions, String dossierId) { + public void processDocument(Document classifiedDoc, String dossierTemplateId, ManualRedactions manualRedactions, String dossierId, List fileAttributes) { dictionaryService.updateDictionary(dossierTemplateId, dossierId); KieContainer container = droolsExecutionService.updateRules(dossierTemplateId); @@ -41,7 +42,7 @@ public class EntityRedactionService { Dictionary dictionary = dictionaryService.getDeepCopyDictionary(dossierTemplateId, dossierId); - Set documentEntities = new HashSet<>(findEntities(classifiedDoc, container, manualRedactions, dictionary, false, null)); + Set documentEntities = new HashSet<>(findEntities(classifiedDoc, container, manualRedactions, dictionary, false, null, fileAttributes)); if (dictionary.hasLocalEntries()) { @@ -53,7 +54,7 @@ public class EntityRedactionService { } }); - Set foundByLocal = findEntities(classifiedDoc, container, manualRedactions, dictionary, true, hintsPerSectionNumber); + Set foundByLocal = findEntities(classifiedDoc, container, manualRedactions, dictionary, true, hintsPerSectionNumber, fileAttributes); EntitySearchUtils.addEntitiesWithHigherRank(documentEntities, foundByLocal, dictionary); EntitySearchUtils.removeEntitiesContainedInLarger(documentEntities); } @@ -84,7 +85,7 @@ public class EntityRedactionService { private Set findEntities(Document classifiedDoc, KieContainer kieContainer, ManualRedactions manualRedactions, Dictionary dictionary, boolean local, - Map> hintsPerSectionNumber) { + Map> hintsPerSectionNumber, List fileAttributes) { Set documentEntities = new HashSet<>(); @@ -95,31 +96,31 @@ public class EntityRedactionService { List tables = paragraph.getTables(); for (Table table : tables) { if (table.getColCount() == 2) { - sectionSearchableTextPairs.addAll(processTableAsOneText(classifiedDoc, table, manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber)); + sectionSearchableTextPairs.addAll(processTableAsOneText(classifiedDoc, table, manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, fileAttributes)); } else { - sectionSearchableTextPairs.addAll(processTablePerRow(classifiedDoc, table, manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber)); + sectionSearchableTextPairs.addAll(processTablePerRow(classifiedDoc, table, manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, fileAttributes)); } sectionNumber.incrementAndGet(); } sectionSearchableTextPairs.add(processText(classifiedDoc, paragraph.getSearchableText(), paragraph.getTextBlocks(), paragraph .getHeadline(), manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, paragraph - .getImages())); + .getImages(), fileAttributes)); sectionNumber.incrementAndGet(); } for (Header header : classifiedDoc.getHeaders()) { - sectionSearchableTextPairs.add(processText(classifiedDoc, header.getSearchableText(), header.getTextBlocks(), "Header", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>())); + sectionSearchableTextPairs.add(processText(classifiedDoc, header.getSearchableText(), header.getTextBlocks(), "Header", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>(), fileAttributes)); sectionNumber.incrementAndGet(); } for (Footer footer : classifiedDoc.getFooters()) { - sectionSearchableTextPairs.add(processText(classifiedDoc, footer.getSearchableText(), footer.getTextBlocks(), "Footer", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>())); + sectionSearchableTextPairs.add(processText(classifiedDoc, footer.getSearchableText(), footer.getTextBlocks(), "Footer", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>(), fileAttributes)); sectionNumber.incrementAndGet(); } for (UnclassifiedText unclassifiedText : classifiedDoc.getUnclassifiedTexts()) { sectionSearchableTextPairs.add(processText(classifiedDoc, unclassifiedText.getSearchableText(), unclassifiedText - .getTextBlocks(), "", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>())); + .getTextBlocks(), "", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>(), fileAttributes)); sectionNumber.incrementAndGet(); } @@ -164,7 +165,7 @@ public class EntityRedactionService { ManualRedactions manualRedactions, AtomicInteger sectionNumber, Dictionary dictionary, boolean local, - Map> hintsPerSectionNumber) { + Map> hintsPerSectionNumber, List fileAttributes) { List sectionSearchableTextPairs = new ArrayList<>(); @@ -229,6 +230,7 @@ public class EntityRedactionService { .tabularData(tabularData) .searchableText(searchableRow) .dictionary(dictionary) + .fileAttributes(fileAttributes) .build(), searchableRow)); if (!local) { @@ -252,7 +254,8 @@ public class EntityRedactionService { ManualRedactions manualRedactions, AtomicInteger sectionNumber, Dictionary dictionary, boolean local, - Map> hintsPerSectionNumber) { + Map> hintsPerSectionNumber, + List fileAttributes) { List sectionSearchableTextPairs = new ArrayList<>(); SearchableText entireTableText = new SearchableText(); @@ -296,6 +299,7 @@ public class EntityRedactionService { .sectionNumber(sectionNumber.intValue()) .searchableText(entireTableText) .dictionary(dictionary) + .fileAttributes(fileAttributes) .build(), entireTableText)); if (!local) { @@ -315,7 +319,7 @@ public class EntityRedactionService { ManualRedactions manualRedactions, AtomicInteger sectionNumber, Dictionary dictionary, boolean local, Map> hintsPerSectionNumber, - List images) { + List images, List fileAttributes) { if (!local) { SectionText sectionText = new SectionText(); @@ -355,6 +359,7 @@ public class EntityRedactionService { .images(images.stream() .map(image -> convert(image, sectionNumber.intValue(), headline)) .collect(Collectors.toSet())) + .fileAttributes(fileAttributes) .build(), searchableText); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java index 3872f49f..ddc67900 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java @@ -1,14 +1,46 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.kie.api.runtime.KieContainer; +import org.springframework.stereotype.Service; +import org.springframework.web.bind.annotation.RequestBody; + import com.iqser.red.service.file.management.v1.api.model.FileType; -import com.iqser.red.service.redaction.v1.model.*; +import com.iqser.red.service.redaction.v1.model.AnalyzeRequest; +import com.iqser.red.service.redaction.v1.model.AnalyzeResult; +import com.iqser.red.service.redaction.v1.model.Comment; +import com.iqser.red.service.redaction.v1.model.IdRemoval; +import com.iqser.red.service.redaction.v1.model.ManualForceRedact; +import com.iqser.red.service.redaction.v1.model.ManualImageRecategorization; +import com.iqser.red.service.redaction.v1.model.ManualLegalBasisChange; +import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry; +import com.iqser.red.service.redaction.v1.model.ManualRedactions; +import com.iqser.red.service.redaction.v1.model.Rectangle; +import com.iqser.red.service.redaction.v1.model.RedactionLog; +import com.iqser.red.service.redaction.v1.model.RedactionLogEntry; +import com.iqser.red.service.redaction.v1.model.SectionArea; import com.iqser.red.service.redaction.v1.server.classification.model.Document; import com.iqser.red.service.redaction.v1.server.classification.model.SectionText; import com.iqser.red.service.redaction.v1.server.classification.model.Text; import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient; import com.iqser.red.service.redaction.v1.server.exception.RedactionException; import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary; -import com.iqser.red.service.redaction.v1.server.redaction.model.*; +import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement; +import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryVersion; +import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; +import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence; +import com.iqser.red.service.redaction.v1.server.redaction.model.Image; +import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D; +import com.iqser.red.service.redaction.v1.server.redaction.model.Section; +import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair; import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils; import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; @@ -17,14 +49,6 @@ import lombok.RequiredArgsConstructor; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; -import org.kie.api.runtime.KieContainer; -import org.springframework.stereotype.Service; -import org.springframework.web.bind.annotation.RequestBody; - -import java.util.*; -import java.util.stream.Collectors; -import java.util.stream.Stream; - @Slf4j @Service @RequiredArgsConstructor @@ -41,6 +65,7 @@ public class ReanalyzeService { private final AnalyzeResponseService analyzeResponseService; private final LegalBasisClient legalBasisClient; + public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) { long startTime = System.currentTimeMillis(); @@ -59,18 +84,18 @@ public class ReanalyzeService { log.info("Document structure analysis successful, starting redaction analysis..."); entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getDossierTemplateId(), analyzeRequest.getManualRedactions(), analyzeRequest - .getDossierId()); + .getDossierId(), analyzeRequest.getFileAttributes()); redactionLogCreatorService.createRedactionLog(classifiedDoc, pageCount, analyzeRequest.getManualRedactions(), analyzeRequest .getDossierTemplateId()); log.info("Redaction analysis successful..."); var legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId()); - var redactionLog = new RedactionLog(classifiedDoc.getRedactionLogEntities(),legalBasis, - classifiedDoc.getDictionaryVersion().getDossierTemplateVersion(), - classifiedDoc.getDictionaryVersion().getDossierVersion(), - classifiedDoc.getRulesVersion(), - legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId())); + var redactionLog = new RedactionLog(classifiedDoc.getRedactionLogEntities(), legalBasis, classifiedDoc.getDictionaryVersion() + .getDossierTemplateVersion(), classifiedDoc.getDictionaryVersion() + .getDossierVersion(), classifiedDoc.getRulesVersion(), legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId())); + + excludeExcludedPages(redactionLog, analyzeRequest.getExcludedPages()); log.info("Analyzed with rules {} and dictionary {} for dossierTemplate: {}", classifiedDoc.getRulesVersion(), classifiedDoc .getDictionaryVersion(), analyzeRequest.getDossierTemplateId()); @@ -165,14 +190,15 @@ public class ReanalyzeService { KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId()); - Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId()); + Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest + .getDossierId()); List sectionSearchableTextPairs = new ArrayList<>(); for (SectionText reanalysisSection : reanalysisSections) { Set entities = entityRedactionService.findEntities(reanalysisSection.getSearchableText(), reanalysisSection .getHeadline(), reanalysisSection.getSectionNumber(), dictionary, false); - if (reanalysisSection.getCellStarts() != null) { + if (reanalysisSection.getCellStarts() != null && !reanalysisSection.getCellStarts().isEmpty()) { surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary, reanalysisSection .getCellStarts()); } else { @@ -191,6 +217,7 @@ public class ReanalyzeService { .searchableText(reanalysisSection.getSearchableText()) .dictionary(dictionary) .images(reanalysisSection.getImages()) + .fileAttributes(analyzeRequest.getFileAttributes()) .build(), reanalysisSection.getSearchableText())); } @@ -240,11 +267,11 @@ public class ReanalyzeService { .getDossierTemplateId())); } - redactionLog.getRedactionLogEntry() - .removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber())); + redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber())); redactionLog.getRedactionLogEntry().addAll(newRedactionLogEntries); - return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement); - + AnalyzeResult analyzeResult = finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement); + analyzeResult.setWasReanalyzed(true); + return analyzeResult; } @@ -255,6 +282,8 @@ public class ReanalyzeService { redactionLog.setDictionaryVersion(dictionaryIncrement.getDictionaryVersion().getDossierTemplateVersion()); redactionLog.setDossierDictionaryVersion(dictionaryIncrement.getDictionaryVersion().getDossierVersion()); + excludeExcludedPages(redactionLog, analyzeRequest.getExcludedPages()); + var changeLog = redactionChangeLogService.createAndStoreChangeLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog); redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog); @@ -271,9 +300,9 @@ public class ReanalyzeService { return new HashSet<>(); } - return Stream.concat(manualRedactions.getIdsToRemove() - .stream() - .map(IdRemoval::getId), manualRedactions.getForceRedacts().stream().map(ManualForceRedact::getId)) + return Stream.concat(manualRedactions.getManualLegalBasisChanges().stream().map(ManualLegalBasisChange::getId), + Stream.concat(manualRedactions.getImageRecategorizations().stream().map(ManualImageRecategorization::getId), + Stream.concat(manualRedactions.getIdsToRemove().stream().map(IdRemoval::getId), manualRedactions.getForceRedacts().stream().map(ManualForceRedact::getId)))) .collect(Collectors.toSet()); } @@ -292,4 +321,18 @@ public class ReanalyzeService { .build(); } + + private void excludeExcludedPages(RedactionLog redactionLog, Set excludedPages) { + + redactionLog.getRedactionLogEntry().forEach(entry -> { + entry.getPositions().forEach(pos -> { + if (excludedPages != null && excludedPages.contains(pos.getPage())) { + entry.setExcluded(true); + } else { + entry.setExcluded(false); + } + }); + }); + } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionChangeLogService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionChangeLogService.java index 53fc805e..1cc743e5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionChangeLogService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionChangeLogService.java @@ -90,6 +90,7 @@ public class RedactionChangeLogService { .comments(entry.getComments()) .changeType(changeType) .isDossierDictionaryEntry(entry.isDossierDictionaryEntry()) + .excluded(entry.isExcluded()) .build(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java index 9aef4dc7..5c5e1e70 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java @@ -9,19 +9,17 @@ import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSeque import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence; import com.iqser.red.service.redaction.v1.server.redaction.model.Image; +import com.iqser.red.service.redaction.v1.server.redaction.model.ReasonHolder; import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder; import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; import lombok.RequiredArgsConstructor; import org.apache.commons.collections4.CollectionUtils; +import org.springframework.beans.BeanUtils; import org.springframework.stereotype.Service; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import java.util.stream.Collectors; @Service @@ -69,7 +67,7 @@ public class RedactionLogCreatorService { RedactionLogEntry redactionLogEntry = RedactionLogEntry.builder() .id(id) - .color(getColorForImage(image, dossierTemplateId, false)) + .color(getColorForImage(image.getType(), dossierTemplateId, false, image.isRedaction())) .isImage(true) .type(image.getType()) .redacted(image.isRedaction()) @@ -87,59 +85,7 @@ public class RedactionLogCreatorService { .section(image.getSection()) .build(); - if (manualRedactions != null && !manualRedactions.getIdsToRemove().isEmpty()) { - for (IdRemoval manualRemoval : manualRedactions.getIdsToRemove()) { - if (manualRemoval.getId().equals(id)) { - String manualOverrideReason = null; - if (manualRemoval.getStatus().equals(Status.APPROVED)) { - image.setRedaction(false); - redactionLogEntry.setRedacted(false); - redactionLogEntry.setStatus(Status.APPROVED); - manualOverrideReason = image.getRedactionReason() + ", removed by manual override"; - redactionLogEntry.setColor(getColorForImage(image, dossierTemplateId, false)); - } else if (manualRemoval.getStatus().equals(Status.REQUESTED)) { - manualOverrideReason = image.getRedactionReason() + ", requested to remove"; - redactionLogEntry.setStatus(Status.REQUESTED); - redactionLogEntry.setColor(getColorForImage(image, dossierTemplateId, true)); - } else { - redactionLogEntry.setStatus(Status.DECLINED); - } - - image.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : image.getRedactionReason()); - redactionLogEntry.setReason(manualOverrideReason); - redactionLogEntry.setManual(true); - redactionLogEntry.setManualRedactionType(ManualRedactionType.REMOVE); - } - } - } - - if (manualRedactions != null && !manualRedactions.getForceRedacts().isEmpty()) { - for (ManualForceRedact manualForceRedact : manualRedactions.getForceRedacts()) { - if (manualForceRedact.getId().equals(id)) { - String manualOverrideReason = null; - if (manualForceRedact.getStatus().equals(Status.APPROVED)) { - image.setRedaction(true); - redactionLogEntry.setRedacted(true); - redactionLogEntry.setStatus(Status.APPROVED); - redactionLogEntry.setColor(getColorForImage(image, dossierTemplateId, false)); - manualOverrideReason = image.getRedactionReason() + ", forced by manual override"; - redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis()); - } else if (manualForceRedact.getStatus().equals(Status.REQUESTED)) { - manualOverrideReason = image.getRedactionReason() + ", requested to force redact"; - redactionLogEntry.setStatus(Status.REQUESTED); - redactionLogEntry.setColor(getColorForImage(image, dossierTemplateId, true)); - redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis()); - } else { - redactionLogEntry.setStatus(Status.DECLINED); - } - - image.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : image.getRedactionReason()); - redactionLogEntry.setReason(manualOverrideReason); - redactionLogEntry.setManual(true); - redactionLogEntry.setManualRedactionType(ManualRedactionType.FORCE_REDACT); - } - } - } + processImageEntry(manualRedactions, dossierTemplateId, image, redactionLogEntry); redactionLogEntities.add(redactionLogEntry); } @@ -147,6 +93,118 @@ public class RedactionLogCreatorService { return redactionLogEntities; } + private void processImageEntry(ManualRedactions manualRedactions, String dossierTemplateId, ReasonHolder image, RedactionLogEntry redactionLogEntry) { + if (manualRedactions != null && !manualRedactions.getImageRecategorizations().isEmpty()) { + for (ManualImageRecategorization recategorization : manualRedactions.getImageRecategorizations()) { + if (recategorization.getId().equals(redactionLogEntry.getId())) { + String manualOverrideReason = null; + if (recategorization.getStatus().equals(Status.APPROVED)) { + image.setRedaction(recategorization.isRedacted()); + redactionLogEntry.setType(recategorization.getType()); + redactionLogEntry.setHint(dictionaryService.isHint(recategorization.getType(), dossierTemplateId)); + redactionLogEntry.setRedacted(recategorization.isRedacted()); + redactionLogEntry.setStatus(Status.APPROVED); + redactionLogEntry.setLegalBasis(recategorization.getLegalBasis()); + manualOverrideReason = mergeReasonIfNecessary(image.getRedactionReason(), ", recategorized by manual override"); + redactionLogEntry.setColor(getColorForImage(redactionLogEntry.getType(), dossierTemplateId, false, redactionLogEntry.isRedacted())); + } else if (recategorization.getStatus().equals(Status.REQUESTED)) { + manualOverrideReason = mergeReasonIfNecessary(image.getRedactionReason(), ", requested to recategorize"); + redactionLogEntry.setStatus(Status.REQUESTED); + redactionLogEntry.setColor(getColorForImage(redactionLogEntry.getType(), dossierTemplateId, true, redactionLogEntry.isRedacted())); + redactionLogEntry.setRecategorizationType(recategorization.getType()); + } else { + redactionLogEntry.setStatus(Status.DECLINED); + } + + image.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : image.getRedactionReason()); + redactionLogEntry.setReason(manualOverrideReason); + redactionLogEntry.setManual(true); + redactionLogEntry.setManualRedactionType(ManualRedactionType.RECATEGORIZE); + } + } + } + + if (manualRedactions != null && !manualRedactions.getIdsToRemove().isEmpty()) { + for (IdRemoval manualRemoval : manualRedactions.getIdsToRemove()) { + if (manualRemoval.getId().equals(redactionLogEntry.getId())) { + String manualOverrideReason = null; + if (manualRemoval.getStatus().equals(Status.APPROVED)) { + image.setRedaction(false); + redactionLogEntry.setRedacted(false); + redactionLogEntry.setStatus(Status.APPROVED); + manualOverrideReason = mergeReasonIfNecessary(image.getRedactionReason(), ", removed by manual override"); + redactionLogEntry.setColor(getColorForImage(redactionLogEntry.getType(), dossierTemplateId, false, redactionLogEntry.isRedacted())); + } else if (manualRemoval.getStatus().equals(Status.REQUESTED)) { + manualOverrideReason = mergeReasonIfNecessary(image.getRedactionReason(), ", requested to remove"); + redactionLogEntry.setStatus(Status.REQUESTED); + redactionLogEntry.setColor(getColorForImage(redactionLogEntry.getType(), dossierTemplateId, true, redactionLogEntry.isRedacted())); + } else { + redactionLogEntry.setStatus(Status.DECLINED); + } + + image.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : image.getRedactionReason()); + redactionLogEntry.setReason(manualOverrideReason); + redactionLogEntry.setManual(true); + redactionLogEntry.setManualRedactionType(ManualRedactionType.REMOVE); + } + } + } + + if (manualRedactions != null && !manualRedactions.getForceRedacts().isEmpty()) { + for (ManualForceRedact manualForceRedact : manualRedactions.getForceRedacts()) { + if (manualForceRedact.getId().equals(redactionLogEntry.getId())) { + String manualOverrideReason = null; + if (manualForceRedact.getStatus().equals(Status.APPROVED)) { + image.setRedaction(true); + redactionLogEntry.setRedacted(true); + redactionLogEntry.setStatus(Status.APPROVED); + redactionLogEntry.setColor(getColorForImage(redactionLogEntry.getType(), dossierTemplateId, false, redactionLogEntry.isRedacted())); + manualOverrideReason = mergeReasonIfNecessary(image.getRedactionReason(), ", forced by manual override"); + redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis()); + } else if (manualForceRedact.getStatus().equals(Status.REQUESTED)) { + manualOverrideReason = mergeReasonIfNecessary(image.getRedactionReason(), ", requested to force redact"); + redactionLogEntry.setStatus(Status.REQUESTED); + redactionLogEntry.setColor(getColorForImage(redactionLogEntry.getType(), dossierTemplateId, true, redactionLogEntry.isRedacted())); + redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis()); + } else { + redactionLogEntry.setStatus(Status.DECLINED); + } + + image.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : image.getRedactionReason()); + redactionLogEntry.setReason(manualOverrideReason); + redactionLogEntry.setManual(true); + redactionLogEntry.setManualRedactionType(ManualRedactionType.FORCE_REDACT); + } + } + } + + + if (manualRedactions != null && !manualRedactions.getManualLegalBasisChanges().isEmpty()) { + for (ManualLegalBasisChange manualLegalBasisChange : manualRedactions.getManualLegalBasisChanges()) { + if (manualLegalBasisChange.getId().equals(redactionLogEntry.getId())) { + String manualOverrideReason = null; + if (manualLegalBasisChange.getStatus().equals(Status.APPROVED)) { + redactionLogEntry.setStatus(Status.APPROVED); + manualOverrideReason = mergeReasonIfNecessary(image.getRedactionReason(), ", legal basis was manually changed"); + redactionLogEntry.setLegalBasis(manualLegalBasisChange.getLegalBasis()); + } else if (manualLegalBasisChange.getStatus().equals(Status.REQUESTED)) { + manualOverrideReason = mergeReasonIfNecessary(image.getRedactionReason(), ", legal basis change requested"); + redactionLogEntry.setStatus(Status.REQUESTED); + redactionLogEntry.setColor(getColorForImage(redactionLogEntry.getType(), dossierTemplateId, true, redactionLogEntry.isRedacted())); + redactionLogEntry.setLegalBasisChangeValue(manualLegalBasisChange.getLegalBasis()); + } else { + redactionLogEntry.setStatus(Status.DECLINED); + } + + image.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : image.getRedactionReason()); + redactionLogEntry.setReason(manualOverrideReason); + redactionLogEntry.setManual(true); + redactionLogEntry.setManualRedactionType(ManualRedactionType.LEGAL_BASIS_CHANGE); + } + } + } + } + private Set getManualRedactionPages(ManualRedactions manualRedactions) { @@ -176,7 +234,6 @@ public class RedactionLogCreatorService { entityLoop: for (Entity entity : entities.get(page)) { - List comments = null; for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) { @@ -189,60 +246,8 @@ public class RedactionLogCreatorService { processedIds.add(entityPositionSequence.getId()); } - if (manualRedactions != null && !manualRedactions.getIdsToRemove().isEmpty()) { - for (IdRemoval manualRemoval : manualRedactions.getIdsToRemove()) { - if (manualRemoval.getId().equals(entityPositionSequence.getId())) { - comments = manualRedactions.getComments().get(manualRemoval.getId()); - String manualOverrideReason = null; - if (manualRemoval.getStatus().equals(Status.APPROVED)) { - entity.setRedaction(false); - redactionLogEntry.setRedacted(false); - redactionLogEntry.setStatus(Status.APPROVED); - manualOverrideReason = entity.getRedactionReason() + ", removed by manual override"; - redactionLogEntry.setColor(getColor(entity, dossierTemplateId, false)); - } else if (manualRemoval.getStatus().equals(Status.REQUESTED)) { - manualOverrideReason = entity.getRedactionReason() + ", requested to remove"; - redactionLogEntry.setStatus(Status.REQUESTED); - redactionLogEntry.setColor(getColor(entity, dossierTemplateId, true)); - } else { - redactionLogEntry.setStatus(Status.DECLINED); - } - - entity.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : entity.getRedactionReason()); - redactionLogEntry.setReason(manualOverrideReason); - redactionLogEntry.setManual(true); - redactionLogEntry.setManualRedactionType(ManualRedactionType.REMOVE); - } - } - } - - if (manualRedactions != null && !manualRedactions.getForceRedacts().isEmpty()) { - for (ManualForceRedact manualForceRedact : manualRedactions.getForceRedacts()) { - if (manualForceRedact.getId().equals(entityPositionSequence.getId())) { - String manualOverrideReason = null; - if (manualForceRedact.getStatus().equals(Status.APPROVED)) { - entity.setRedaction(true); - redactionLogEntry.setRedacted(true); - redactionLogEntry.setStatus(Status.APPROVED); - redactionLogEntry.setColor(getColor(entity, dossierTemplateId, false)); - manualOverrideReason = entity.getRedactionReason() + ", forced by manual override"; - redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis()); - } else if (manualForceRedact.getStatus().equals(Status.REQUESTED)) { - manualOverrideReason = entity.getRedactionReason() + ", requested to force redact"; - redactionLogEntry.setStatus(Status.REQUESTED); - redactionLogEntry.setColor(getColor(entity, dossierTemplateId, true)); - redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis()); - } else { - redactionLogEntry.setStatus(Status.DECLINED); - } - - entity.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : entity.getRedactionReason()); - redactionLogEntry.setReason(manualOverrideReason); - redactionLogEntry.setManual(true); - redactionLogEntry.setManualRedactionType(ManualRedactionType.FORCE_REDACT); - } - } - } + redactionLogEntry.setId(entityPositionSequence.getId()); + processRedactionLogEntry(manualRedactions, dossierTemplateId, redactionLogEntry, entity); if (CollectionUtils.isNotEmpty(entityPositionSequence.getSequences())) { List rectanglesPerLine = getRectanglesPerLine(entityPositionSequence.getSequences() @@ -250,16 +255,11 @@ public class RedactionLogCreatorService { .flatMap(seq -> seq.getTextPositions().stream()) .collect(Collectors.toList()), page); - if (manualRedactions != null) { - comments = manualRedactions.getComments().get(entityPositionSequence.getId()); - } - redactionLogEntry.setComments(comments); redactionLogEntry.getPositions().addAll(rectanglesPerLine); } - redactionLogEntry.setId(entityPositionSequence.getId()); // FIXME ids should never be null. Figure out why this happens. if (redactionLogEntry.getId() != null) { @@ -271,25 +271,130 @@ public class RedactionLogCreatorService { return redactionLogEntities; } + private void processRedactionLogEntry(ManualRedactions manualRedactions, String dossierTemplateId, RedactionLogEntry redactionLogEntry, ReasonHolder reasonHolder) { + + List comments = null; + + if (manualRedactions != null && !manualRedactions.getIdsToRemove().isEmpty()) { + for (IdRemoval manualRemoval : manualRedactions.getIdsToRemove()) { + if (manualRemoval.getId().equals(redactionLogEntry.getId())) { + comments = manualRedactions.getComments().get(manualRemoval.getId()); + String manualOverrideReason = null; + if (manualRemoval.getStatus().equals(Status.APPROVED)) { + reasonHolder.setRedaction(false); + redactionLogEntry.setRedacted(false); + redactionLogEntry.setStatus(Status.APPROVED); + manualOverrideReason = mergeReasonIfNecessary(reasonHolder.getRedactionReason(), ", removed by manual override"); + redactionLogEntry.setColor(getColor(redactionLogEntry.getType(), dossierTemplateId, false, redactionLogEntry.isRedacted())); + } else if (manualRemoval.getStatus().equals(Status.REQUESTED)) { + manualOverrideReason = mergeReasonIfNecessary(reasonHolder.getRedactionReason(), ", requested to remove"); + redactionLogEntry.setStatus(Status.REQUESTED); + redactionLogEntry.setColor(getColor(redactionLogEntry.getType(), dossierTemplateId, true, redactionLogEntry.isRedacted())); + } else { + redactionLogEntry.setStatus(Status.DECLINED); + } + + reasonHolder.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : reasonHolder.getRedactionReason()); + redactionLogEntry.setReason(manualOverrideReason); + redactionLogEntry.setManual(true); + redactionLogEntry.setManualRedactionType(ManualRedactionType.REMOVE); + } + } + } + + if (manualRedactions != null && !manualRedactions.getForceRedacts().isEmpty()) { + for (ManualForceRedact manualForceRedact : manualRedactions.getForceRedacts()) { + if (manualForceRedact.getId().equals(redactionLogEntry.getId())) { + String manualOverrideReason = null; + if (manualForceRedact.getStatus().equals(Status.APPROVED)) { + reasonHolder.setRedaction(true); + redactionLogEntry.setRedacted(true); + redactionLogEntry.setStatus(Status.APPROVED); + redactionLogEntry.setColor(getColor(redactionLogEntry.getType(), dossierTemplateId, false, redactionLogEntry.isRedacted())); + manualOverrideReason = mergeReasonIfNecessary(reasonHolder.getRedactionReason(), ", forced by manual override"); + redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis()); + } else if (manualForceRedact.getStatus().equals(Status.REQUESTED)) { + manualOverrideReason = mergeReasonIfNecessary(reasonHolder.getRedactionReason(), ", requested to force redact"); + redactionLogEntry.setStatus(Status.REQUESTED); + redactionLogEntry.setColor(getColor(redactionLogEntry.getType(), dossierTemplateId, true, redactionLogEntry.isRedacted())); + redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis()); + } else { + redactionLogEntry.setStatus(Status.DECLINED); + } + + reasonHolder.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : reasonHolder.getRedactionReason()); + redactionLogEntry.setReason(manualOverrideReason); + redactionLogEntry.setManual(true); + redactionLogEntry.setManualRedactionType(ManualRedactionType.FORCE_REDACT); + } + } + } + + if (manualRedactions != null && !manualRedactions.getManualLegalBasisChanges().isEmpty()) { + for (ManualLegalBasisChange manualLegalBasisChange : manualRedactions.getManualLegalBasisChanges()) { + if (manualLegalBasisChange.getId().equals(redactionLogEntry.getId())) { + String manualOverrideReason = null; + if (manualLegalBasisChange.getStatus().equals(Status.APPROVED)) { + redactionLogEntry.setStatus(Status.APPROVED); + manualOverrideReason = mergeReasonIfNecessary(reasonHolder.getRedactionReason(), ", legal basis was manually changed"); + redactionLogEntry.setLegalBasis(manualLegalBasisChange.getLegalBasis()); + } else if (manualLegalBasisChange.getStatus().equals(Status.REQUESTED)) { + manualOverrideReason = mergeReasonIfNecessary(reasonHolder.getRedactionReason(), ", legal basis change requested"); + redactionLogEntry.setStatus(Status.REQUESTED); + redactionLogEntry.setColor(getColor(redactionLogEntry.getType(), dossierTemplateId, true, redactionLogEntry.isRedacted())); + redactionLogEntry.setLegalBasisChangeValue(manualLegalBasisChange.getLegalBasis()); + } else { + redactionLogEntry.setStatus(Status.DECLINED); + } + + reasonHolder.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : reasonHolder.getRedactionReason()); + redactionLogEntry.setReason(manualOverrideReason); + redactionLogEntry.setManual(true); + redactionLogEntry.setManualRedactionType(ManualRedactionType.LEGAL_BASIS_CHANGE); + } + } + } + + + if (manualRedactions != null) { + comments = manualRedactions.getComments().get(redactionLogEntry.getId()); + } + + redactionLogEntry.setComments(comments); + } + + private String mergeReasonIfNecessary(String currentReason, String addition) { + if (currentReason != null) { + if (!currentReason.contains(addition)) { + return currentReason + addition; + } + return currentReason; + } else { + return ""; + } + } + private List getRectanglesPerLine(List textPositions, int page) { List rectangles = new ArrayList<>(); if (textPositions.size() == 1) { - rectangles.add( TextPositionSequence.fromData(textPositions, page).getRectangle()); + rectangles.add(TextPositionSequence.fromData(textPositions, page).getRectangle()); } else { float y = textPositions.get(0).getYDirAdj(); int startIndex = 0; for (int i = 1; i < textPositions.size(); i++) { float yDirAdj = textPositions.get(i).getYDirAdj(); if (yDirAdj != y) { - rectangles.add( TextPositionSequence.fromData(textPositions.subList(startIndex, i), page).getRectangle()); + rectangles.add(TextPositionSequence.fromData(textPositions.subList(startIndex, i), page) + .getRectangle()); y = yDirAdj; startIndex = i; } } if (startIndex != textPositions.size()) { - rectangles.add( TextPositionSequence.fromData(textPositions.subList(startIndex, textPositions.size()), page).getRectangle()); + rectangles.add(TextPositionSequence.fromData(textPositions.subList(startIndex, textPositions.size()), page) + .getRectangle()); } } @@ -363,14 +468,14 @@ public class RedactionLogCreatorService { private RedactionLogEntry createRedactionLogEntry(Entity entity, String dossierTemplateId) { return RedactionLogEntry.builder() - .color(getColor(entity, dossierTemplateId, false)) + .color(getColor(entity.getType(), dossierTemplateId, false, entity.isRedaction())) .reason(entity.getRedactionReason()) .legalBasis(entity.getLegalBasis()) .value(entity.getWord()) .type(entity.getType()) .redacted(entity.isRedaction()) - .isHint(isHint(entity, dossierTemplateId)) - .isRecommendation(isRecommendation(entity, dossierTemplateId)) + .isHint(isHint(entity.getType(), dossierTemplateId)) + .isRecommendation(isRecommendation(entity.getType(), dossierTemplateId)) .section(entity.getHeadline()) .sectionNumber(entity.getSectionNumber()) .matchedRule(entity.getMatchedRule()) @@ -384,15 +489,15 @@ public class RedactionLogCreatorService { } - private float[] getColor(Entity entity, String dossierTemplateId, boolean requestedToRemove) { + private float[] getColor(String type, String dossierTemplateId, boolean requestedToRemove, boolean isRedaction) { if (requestedToRemove) { return dictionaryService.getRequestRemoveColor(dossierTemplateId); } - if (!entity.isRedaction() && !isHint(entity, dossierTemplateId)) { + if (!isRedaction && !isHint(type, dossierTemplateId)) { return dictionaryService.getNotRedactedColor(dossierTemplateId); } - return dictionaryService.getColor(entity.getType(), dossierTemplateId); + return dictionaryService.getColor(type, dossierTemplateId); } @@ -413,27 +518,27 @@ public class RedactionLogCreatorService { } - private float[] getColorForImage(Image image, String dossierTemplateId, boolean requestedToRemove) { + private float[] getColorForImage(String type, String dossierTemplateId, boolean requestedToRemove, boolean isRedaction) { if (requestedToRemove) { return dictionaryService.getRequestRemoveColor(dossierTemplateId); } - if (!image.isRedaction() && !dictionaryService.isHint(image.getType(), dossierTemplateId)) { + if (!isRedaction && !dictionaryService.isHint(type, dossierTemplateId)) { return dictionaryService.getNotRedactedColor(dossierTemplateId); } - return dictionaryService.getColor(image.getType(), dossierTemplateId); + return dictionaryService.getColor(type, dossierTemplateId); } - private boolean isHint(Entity entity, String dossierTemplateId) { + private boolean isHint(String type, String dossierTemplateId) { - return dictionaryService.isHint(entity.getType(), dossierTemplateId); + return dictionaryService.isHint(type, dossierTemplateId); } - private boolean isRecommendation(Entity entity, String dossierTemplateId) { + private boolean isRecommendation(String type, String dossierTemplateId) { - return dictionaryService.isRecommendation(entity.getType(), dossierTemplateId); + return dictionaryService.isRecommendation(type, dossierTemplateId); } @@ -480,4 +585,74 @@ public class RedactionLogCreatorService { } } + public RedactionLog getRedactionLogPreview(RedactionLog redactionLog, String dossierTemplateId, ManualRedactions manualRedactions) { + + + var manualRedactionPages = getManualRedactionPages(manualRedactions); + + // generate all manual entries + var manualRedactionLogEntries = new HashMap(); + for (var page : manualRedactionPages) { + + var pageEntries = addManualAddEntries(manualRedactions.getEntriesToAdd(), manualRedactions.getComments(), page, dossierTemplateId); + + for (var entry : pageEntries) { + manualRedactionLogEntries.put(entry.getId(), entry); + } + } + + for (var manualEntry : manualRedactionLogEntries.values()) { + var existingEntry = redactionLog.getRedactionLogEntry().stream().filter(e -> e.getId().equals(manualEntry.getId())).findAny(); + if (existingEntry.isPresent()) { + // if it has already been processed of sorts, update it + BeanUtils.copyProperties(manualEntry, existingEntry.get()); + } else { + // not yet in the redaction-log - add it + redactionLog.getRedactionLogEntry().add(manualEntry); + } + } + + + for (RedactionLogEntry entry : redactionLog.getRedactionLogEntry()) { + + var reasonHolder = new PreviewReasonHolder(entry); + + if (entry.isImage()) { + processImageEntry(manualRedactions, dossierTemplateId, reasonHolder, entry); + } + + processRedactionLogEntry(manualRedactions, dossierTemplateId, entry, reasonHolder); + } + + return redactionLog; + } + + public static class PreviewReasonHolder implements ReasonHolder { + + private final RedactionLogEntry entry; + + public PreviewReasonHolder(RedactionLogEntry entry) { + this.entry = entry; + } + + @Override + public String getRedactionReason() { + return entry.getReason(); + } + + @Override + public void setRedactionReason(String reason) { + entry.setReason(reason); + } + + @Override + public boolean isRedaction() { + return entry.isRedacted(); + } + + @Override + public void setRedaction(boolean value) { + entry.setRedacted(value); + } + } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/ImageMergeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/ImageMergeService.java new file mode 100644 index 00000000..73a94909 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/ImageMergeService.java @@ -0,0 +1,165 @@ +package com.iqser.red.service.redaction.v1.server.segmentation; + +import java.awt.Graphics; +import java.awt.geom.Rectangle2D; +import java.awt.image.BufferedImage; +import java.util.ArrayList; +import java.util.List; + +import org.springframework.stereotype.Service; + +import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Service +@RequiredArgsConstructor +public class ImageMergeService { + + + public List mergeImages(List images, int rotation){ + + List mergedList = processImages(images, rotation); + + List imagesInImage = new ArrayList<>(); + for(PdfImage image: mergedList){ + for (PdfImage inner: mergedList){ + if(image != inner && image.getPosition().contains(inner.getPosition().getX(), inner.getPosition().getY(), inner.getPosition().getWidth(), inner.getPosition().getHeight())){ + imagesInImage.add(inner); + } + } + } + mergedList.removeAll(imagesInImage); + + return mergedList; + } + + + //merge images, if they are separated during pdf import, return new list of Pdfimages + private List processImages(List imageList, int rotation) { + if (imageList.size() > 1) { + List mergedList = new ArrayList<>(); + int countElementsInList = 0; + boolean beginImage = true; + + // a List of Boolean, true = candidate for merging, false = no merging + List candidatesList = getCandidatesList(imageList, rotation); + + // loop through list, if there are candidates for merging (true), merge images and add it to mergedList + for (int i = 0; i < candidatesList.size(); i++) { + if (candidatesList.get(i)) { + if (beginImage) { + //begin of image, merge two parts of imageList + PdfImage mergedImage = mergeTwoImages(imageList.get(i), imageList.get(i + 1), rotation); + // image merge successful + if (mergedImage != null) { + mergedList.add(mergedImage); + countElementsInList++; + } + } else { + //middle of an image, merge current piece auf mergedList with image of imageList + PdfImage mergedImage = mergeTwoImages(mergedList.get(countElementsInList - 1), imageList.get(i + 1), rotation); + // image merge successful + if (mergedImage != null) { + mergedList.set(countElementsInList - 1, mergedImage); + } + } + beginImage = false; + } else { + // if the last candidate is false, then both images i and i+1 must be added + if (i == candidatesList.size() - 1) { + if (countElementsInList > 0 && mergedList.get(countElementsInList - 1) == imageList.get(i)) { + mergedList.add(imageList.get(i + 1)); + } else { + mergedList.add(imageList.get(i)); + mergedList.add(imageList.get(i + 1)); + } + } else { + //first image is not splitted, add i to resultlist + if (beginImage) { + mergedList.add(imageList.get(i)); + countElementsInList++; + } else { + // i is the end of an image, add begin of new image + mergedList.add(imageList.get(i + 1)); + countElementsInList++; + beginImage = false; + } + } + } + } + return mergedList; + } else { + return imageList; + } + } + + private PdfImage mergeTwoImages(PdfImage image1, PdfImage image2, int rotation) { + + // diese Angaben von getPosition scheinen nicht richtig zu sein, damit werden teile des Bildes abgeschnitten + double width = image1.getPosition().getWidth(); + double width2 = image2.getPosition().getWidth(); + double height1 = image1.getPosition().getHeight(); + double height2 = image2.getPosition().getHeight(); + // mit den Werten, die unter Image gespeichert sind, funktioniert es + double img1height = image1.getImage().getHeight(); + double img1width = image1.getImage().getWidth(); + double img2height = image2.getImage().getHeight(); + + BufferedImage mergedImage = new BufferedImage((int) img1width, (int) (img1height + img2height), BufferedImage.TYPE_INT_RGB); + Graphics mergedImageGraphics = mergedImage.getGraphics(); + try { + mergedImageGraphics.drawImage(image1.getImage(), 0, 0, null); + mergedImageGraphics.drawImage(image2.getImage(), 0, (int) (img1height), null); + + // set Image, Position and type for merged Image + //set position for merged image with values of image1 and the height of both + Rectangle2D pos = new Rectangle2D.Float(); + pos.setRect(image1.getPosition().getX(), image2.getPosition().getY(), rotation == 90 ? width + width2: width, rotation == 90 ? height1 : height1 + height2); + PdfImage newPdfImage = new PdfImage(mergedImage, pos, image1.getPage()); + // Graphics need to be disposed + + image1.getImage().flush(); + image2.getImage().flush(); + + mergedImage.flush(); + mergedImageGraphics.dispose(); + + return newPdfImage; + } catch (Exception e) { + // failed to merge image + log.error("Failed to merge image", e); + return null; + } + + + } + + //make a list of true and false, if the image is a candidate for merging + private List getCandidatesList(List imageList, int rotation) { + List candidatesList = new ArrayList<>(); + for (int i = 0; i < imageList.size(); i++) { + if (i >= 1) { + candidatesList.add(isCandidateForMerging(imageList.get(i - 1), imageList.get(i), rotation)); + } + } + return candidatesList; + } + + // evaluate if two images are candidates for merging, depending on their coordinates, width and height + private boolean isCandidateForMerging(PdfImage image1, PdfImage image2, int rotation) { + double x1 = rotation == 90 ? image1.getPosition().getY() : image1.getPosition().getX(); + double y1 = rotation == 90 ? image1.getPosition().getX() : image1.getPosition().getY(); + double width1 = rotation == 90 ? image1.getPosition().getHeight() : image1.getPosition().getWidth(); + double x2 = rotation == 90 ? image2.getPosition().getY() : image2.getPosition().getX(); + double y2 = rotation == 90 ? image2.getPosition().getX() : image2.getPosition().getY(); + double width2 = rotation == 90 ? image2.getPosition().getHeight() : image2.getPosition().getWidth(); + double height2 = rotation == 90 ? image2.getPosition().getWidth() : image2.getPosition().getHeight(); + //if the x-coordinates and widths of images are equal and the height is equal to difference between y-coordinates, + // then it is the same picture and has to be merged -> return true + return x1 == x2 && width1 == width2 && Math.ceil(height2) == Math.ceil(rotation == 90 ? y2 - y1 : y1 - y2) && width2 > (height2 / 6); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java index a33a009d..22643b0b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java @@ -1,6 +1,19 @@ package com.iqser.red.service.redaction.v1.server.segmentation; -import com.iqser.red.service.redaction.v1.model.Rectangle; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.io.IOUtils; +import org.apache.pdfbox.io.MemoryUsageSetting; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.springframework.stereotype.Service; + import com.iqser.red.service.redaction.v1.server.classification.model.Document; import com.iqser.red.service.redaction.v1.server.classification.model.Page; import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; @@ -15,24 +28,9 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractT import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings; import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService; import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService; + import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.io.IOUtils; -import org.apache.pdfbox.io.MemoryUsageSetting; -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.common.PDRectangle; -import org.springframework.stereotype.Service; - -import java.awt.Graphics; -import java.awt.geom.Rectangle2D; -import java.awt.image.BufferedImage; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; @Slf4j @Service @@ -47,13 +45,17 @@ public class PdfSegmentationService { private final ClassificationService classificationService; private final SectionsBuilderService sectionsBuilderService; private final ImageClassificationService imageClassificationService; + private final ImageMergeService imageMergeService; public Document parseDocument(InputStream documentInputStream) throws IOException { + return parseDocument(documentInputStream, false); } + public Document parseDocument(InputStream documentInputStream, boolean ignoreImages) throws IOException { + PDDocument pdDocument = null; try { //create tempFile @@ -64,7 +66,6 @@ public class PdfSegmentationService { Document document = new Document(); List pages = new ArrayList<>(); - pdDocument = reinitializePDDocument(tempFile, null); long pageCount = pdDocument.getNumberOfPages(); @@ -101,32 +102,19 @@ public class PdfSegmentationService { page.setRotation(rotation); page.setLandscape(isLandscape || isRotated); page.setPageNumber(pageNumber); - List mergedList = processImages(stripper.getImages()); - - List imagesInImage = new ArrayList<>(); - for(PdfImage image: mergedList){ - for (PdfImage inner: mergedList){ - if(image != inner && image.getPosition().contains(inner.getPosition().getX(), inner.getPosition().getY(), inner.getPosition().getWidth(), inner.getPosition().getHeight())){ - imagesInImage.add(inner); - } - } - } - mergedList.removeAll(imagesInImage); + List mergedList = imageMergeService.mergeImages(stripper.getImages(), rotation); page.setImages(mergedList); tableExtractionService.extractTables(cleanRulings, page); buildPageStatistics(page); increaseDocumentStatistics(page, document); - if (!ignoreImages) { imageClassificationService.classifyImages(page); } pages.add(page); - - } document.setPages(pages); @@ -149,7 +137,9 @@ public class PdfSegmentationService { } } + private PDDocument reinitializePDDocument(File tempFile, PDDocument pdDocument) throws IOException { + if (pdDocument != null) { pdDocument.close(); } @@ -164,130 +154,6 @@ public class PdfSegmentationService { return newPDDocument; } - //merge images, if they are separated during pdf import, return new list of Pdfimages - private List processImages(List imageList) { - if (imageList.size() > 1) { - List mergedList = new ArrayList<>(); - int countElementsInList = 0; - boolean beginImage = true; - - // a List of Boolean, true = candidate for merging, false = no merging - List candidatesList = getCandidatesList(imageList); - - // loop through list, if there are candidates for merging (true), merge images and add it to mergedList - for (int i = 0; i < candidatesList.size(); i++) { - if (candidatesList.get(i)) { - if (beginImage) { - //begin of image, merge two parts of imageList - PdfImage mergedImage = mergeTwoImages(imageList.get(i), imageList.get(i + 1)); - // image merge successful - if (mergedImage != null) { - mergedList.add(mergedImage); - countElementsInList++; - } - } else { - //middle of an image, merge current piece auf mergedList with image of imageList - PdfImage mergedImage = mergeTwoImages(mergedList.get(countElementsInList - 1), imageList.get(i + 1)); - // image merge successful - if (mergedImage != null) { - mergedList.set(countElementsInList - 1, mergedImage); - } - } - beginImage = false; - } else { - // if the last candidate is false, then both images i and i+1 must be added - if (i == candidatesList.size() - 1) { - if (countElementsInList > 0 && mergedList.get(countElementsInList - 1) == imageList.get(i)) { - mergedList.add(imageList.get(i + 1)); - } else { - mergedList.add(imageList.get(i)); - mergedList.add(imageList.get(i + 1)); - } - } else { - //first image is not splitted, add i to resultlist - if (beginImage) { - mergedList.add(imageList.get(i)); - countElementsInList++; - } else { - // i is the end of an image, add begin of new image - mergedList.add(imageList.get(i + 1)); - countElementsInList++; - beginImage = false; - } - } - } - } - return mergedList; - } else { - return imageList; - } - } - - private PdfImage mergeTwoImages(PdfImage image1, PdfImage image2) { - - // diese Angaben von getPosition scheinen nicht richtig zu sein, damit werden teile des Bildes abgeschnitten - double width = image1.getPosition().getWidth(); - double height1 = image1.getPosition().getHeight(); - double height2 = image2.getPosition().getHeight(); - // mit den Werten, die unter Image gespeichert sind, funktioniert es - double img1height = image1.getImage().getHeight(); - double img1width = image1.getImage().getWidth(); - double img2height = image2.getImage().getHeight(); - - BufferedImage mergedImage = new BufferedImage((int) img1width, (int) (img1height + img2height), BufferedImage.TYPE_INT_RGB); - Graphics mergedImageGraphics = mergedImage.getGraphics(); - try { - mergedImageGraphics.drawImage(image1.getImage(), 0, 0, null); - mergedImageGraphics.drawImage(image2.getImage(), 0, (int) (img1height), null); - - // set Image, Position and type for merged Image - //set position for merged image with values of image1 and the height of both - Rectangle2D pos = new Rectangle2D.Float(); - pos.setRect(image1.getPosition().getX(), image2.getPosition().getY(), width, height1 + height2); - PdfImage newPdfImage = new PdfImage(mergedImage, pos, image1.getPage()); - // Graphics need to be disposed - - image1.getImage().flush(); - image2.getImage().flush(); - - mergedImage.flush(); - mergedImageGraphics.dispose(); - - return newPdfImage; - } catch (Exception e) { - // failed to merge image - log.error("Failed to merge image", e); - return null; - } - - - } - - //make a list of true and false, if the image is a candidate for merging - private List getCandidatesList(List imageList) { - List candidatesList = new ArrayList<>(); - for (int i = 0; i < imageList.size(); i++) { - if (i >= 1) { - candidatesList.add(isCandidateForMerging(imageList.get(i - 1), imageList.get(i))); - } - } - return candidatesList; - } - - // evaluate if two images are candidates for merging, depending on their coordinates, width and height - private boolean isCandidateForMerging(PdfImage image1, PdfImage image2) { - double x1 = image1.getPosition().getX(); - double y1 = image1.getPosition().getY(); - double width1 = image1.getPosition().getWidth(); - double x2 = image2.getPosition().getX(); - double y2 = image2.getPosition().getY(); - double width2 = image2.getPosition().getWidth(); - double height2 = image2.getPosition().getHeight(); - //if the x-coordinates and widths of images are equal and the height is equal to difference between y-coordinates, - // then it is the same picture and has to be merged -> return true - return x1 == x2 && width1 == width2 && Math.ceil(height2) == Math.ceil(y1 - y2) && width2 > (height2 / 6); - } - private void increaseDocumentStatistics(Page page, Document document) { @@ -319,5 +185,4 @@ public class PdfSegmentationService { } - } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 0b6aac73..609a7828 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -634,6 +634,9 @@ public class RedactionIntegrationTest { long start = System.currentTimeMillis(); ClassPathResource pdfFileResource = new ClassPathResource("files/new/Single Study - Oral (Gavage) Mouse.pdf"); AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream()); + request.setExcludedPages(Set.of(1)); + + request.setFileAttributes(List.of(FileAttribute.builder().id("fileAttributeId").label("Vertebrate Study").placeholder("{fileattributes.vertebrateStudy}").value("true").build())); AnalyzeResult result = reanalyzeService.analyze(request); @@ -689,8 +692,25 @@ public class RedactionIntegrationTest { when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(VERTEBRATE, false)); start = System.currentTimeMillis(); + + ManualRedactions manualRedactions = new ManualRedactions(); + + manualRedactions.setImageRecategorizations(Set.of(ManualImageRecategorization.builder() + .id("37eee3e9d589a5cc529bfec38c3ba479") + .status(Status.APPROVED) + .type("signature") + .redacted(true) + .legalBasis("Article 39(e)(1) and Article 39(e)(2) of Regulation (EC) No 178/2002") + .build())); + + request.setManualRedactions(manualRedactions); + + AnalyzeResult reanalyzeResult = reanalyzeService.reanalyze(request); + + redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); + end = System.currentTimeMillis(); System.out.println("reanalysis analysis duration: " + (end - start)); @@ -758,6 +778,7 @@ public class RedactionIntegrationTest { .status(Status.APPROVED) .build())); + manualRedactions.getComments().put("e5be0f1d941bbb92a068e198648d06c4", List.of(comment)); manualRedactions.getComments().put("0836727c3508a0b2ea271da69c04cc2f", List.of(comment)); manualRedactions.getComments().put(manualAddId, List.of(comment)); @@ -777,14 +798,21 @@ public class RedactionIntegrationTest { request.setManualRedactions(manualRedactions); AnalyzeResult result = reanalyzeService.analyze(request); + manualRedactions.getEntriesToAdd().add(manualRedactionEntry); manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder() .id("5b940b2cb401ed9f5be6fc24f6e77bcf") .status(Status.APPROVED) .build())); + manualRedactions.setManualLegalBasisChanges(Set.of(ManualLegalBasisChange.builder() + .id("675eba69b0c2917de55462c817adaa05") + .legalBasis("Manual Legal Basis Change") + .status(Status.APPROVED) + .build())); reanalyzeService.reanalyze(request); + var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder() .dossierId(TEST_DOSSIER_ID) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java index 01379d9b..1ab88196 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java @@ -151,7 +151,7 @@ public class EntityRedactionServiceTest { when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse); Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId"); + entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null); assertThat(classifiedDoc.getEntities()).hasSize(1); // one page assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 author cells, 1 address, 1 Y and 2 N entities } @@ -177,7 +177,7 @@ public class EntityRedactionServiceTest { when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse); Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId"); + entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null); assertThat(classifiedDoc.getEntities()).hasSize(1); // one page assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 author cells, 1 address, 1 Y and 2 N entities } @@ -202,7 +202,7 @@ public class EntityRedactionServiceTest { .build(); when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse); Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId"); + entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null); assertThat(classifiedDoc.getEntities() .entrySet() .stream() @@ -210,7 +210,7 @@ public class EntityRedactionServiceTest { pdfFileResource = new ClassPathResource("files/Compounds/27 A8637C - EU AIR3 - MCP Section 1 - Identity of " + "the plant protection product.pdf"); classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId"); + entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null); assertThat(classifiedDoc.getEntities() .entrySet() .stream() @@ -235,7 +235,7 @@ public class EntityRedactionServiceTest { .build(); when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse); Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId"); + entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null); assertThat(classifiedDoc.getEntities()).hasSize(1); // one page assertThat(classifiedDoc.getEntities().get(1).stream() .filter(entity -> entity.getMatchedRule() == 9) @@ -302,7 +302,7 @@ public class EntityRedactionServiceTest { .build(); when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse); Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId"); + entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null); assertThat(classifiedDoc.getEntities()).hasSize(1); // one page assertThat(classifiedDoc.getEntities().get(1).stream() .filter(entity -> entity.getMatchedRule() == 6) @@ -341,7 +341,7 @@ public class EntityRedactionServiceTest { .build(); when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse); Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId"); + entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null); assertThat(classifiedDoc.getEntities()).hasSize(1); // one page assertThat(classifiedDoc.getEntities().get(1).stream() .filter(entity -> entity.getMatchedRule() == 11) @@ -371,7 +371,7 @@ public class EntityRedactionServiceTest { .build(); when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse); Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId"); + entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null); assertThat(classifiedDoc.getEntities()).hasSize(2); // two pages assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(8); assertThat(classifiedDoc.getEntities().get(2).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(5); // 2 names, 1 address, 2 Y @@ -390,7 +390,7 @@ public class EntityRedactionServiceTest { when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse); classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId"); + entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null); assertThat(classifiedDoc.getEntities()).hasSize(1); // one page assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(3); assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(9); @@ -419,7 +419,7 @@ public class EntityRedactionServiceTest { .build(); when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse); Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId"); + entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null); assertThat(classifiedDoc.getEntities()).hasSize(1); // one page assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(6); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index 5f7e24f2..853d7fac 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -268,7 +268,7 @@ rule "18: Redact contact information if Producer is found" rule "19: Redact AUTHOR(S)" when - Section(searchText.contains("AUTHOR(S):")) + Section(searchText.contains("AUTHOR(S):") && fileAttributeByPlaceholderEquals("{fileattributes.vertebrateStudy}", "true")) then section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 19, true, "AUTHOR(S) was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); end