From 4c125cf0f76e81a74347bf480c8f5419e5b9eae2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kilian=20Sch=C3=BCttler?= Date: Thu, 10 Aug 2023 11:29:57 +0200 Subject: [PATCH] DM-357: duplicate redactionLogEntries for manualAddRedactions --- .../adapter/CustomEntityCreationAdapter.java | 149 +++++------------- .../redaction/model/EntityIdentifier.java | 68 ++++++++ .../redaction/model/RectangleWithPage.java | 32 ++++ .../redaction/service/AnalyzeService.java | 51 +++--- .../service/EntityRedactionService.java | 5 +- .../service/RedactionLogCreatorService.java | 30 +++- .../graph/ManualRedactionEntryTest.java | 139 ++++++++++++++++ .../document/graph/MigrationPocTest.java | 5 +- 8 files changed, 349 insertions(+), 130 deletions(-) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/EntityIdentifier.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/RectangleWithPage.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/ManualRedactionEntryTest.java diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/CustomEntityCreationAdapter.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/CustomEntityCreationAdapter.java index a4e12c64..c4acca7e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/CustomEntityCreationAdapter.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/CustomEntityCreationAdapter.java @@ -6,6 +6,7 @@ import static java.util.stream.Collectors.groupingBy; import java.awt.geom.Rectangle2D; import java.util.Collection; import java.util.Comparator; +import java.util.LinkedList; import java.util.List; import java.util.Locale; import java.util.Map; @@ -17,9 +18,7 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog; -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType; import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; @@ -28,6 +27,8 @@ import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService; import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService; +import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier; +import com.iqser.red.service.redaction.v1.server.redaction.model.RectangleWithPage; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.SearchImplementation; import lombok.extern.slf4j.Slf4j; @@ -47,40 +48,42 @@ public class CustomEntityCreationAdapter { } - public void toRedactionEntity(RedactionLog redactionLog, SemanticNode node) { + public List toRedactionEntity(RedactionLog redactionLog, SemanticNode node) { List entityIdentifiers = redactionLog.getRedactionLogEntry().stream().map(EntityIdentifier::fromRedactionLogEntry).toList(); - toRedactionEntity(entityIdentifiers, node); + return toRedactionEntity(entityIdentifiers, node); } - public void createRedactionEntities(Set manualRedactionEntries, SemanticNode node) { + public List createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set manualRedactionEntries, SemanticNode node) { List entityIdentifiers = manualRedactionEntries.stream() .filter(manualRedactionEntry -> !(manualRedactionEntry.isAddToDictionary() || manualRedactionEntry.isAddToDossierDictionary())) .map(EntityIdentifier::fromManualRedactionEntry) .toList(); - toRedactionEntity(entityIdentifiers, node); + return toRedactionEntity(entityIdentifiers, node); } - private void toRedactionEntity(List entityIdentifiers, SemanticNode node) { + private List toRedactionEntity(List entityIdentifiers, SemanticNode node) { - Set pageNumbers = entityIdentifiers.stream().flatMap(entry -> entry.entityPosition().stream().map(RectangleWithPage::pageNumber)).collect(Collectors.toSet()); - Set entryValues = entityIdentifiers.stream().map(EntityIdentifier::value).map(String::toLowerCase).collect(Collectors.toSet()); + Set pageNumbers = entityIdentifiers.stream().flatMap(entry -> entry.getEntityPosition().stream().map(RectangleWithPage::pageNumber)).collect(Collectors.toSet()); + Set entryValues = entityIdentifiers.stream().map(EntityIdentifier::getValue).map(String::toLowerCase).collect(Collectors.toSet()); Map> tempEntitiesByValue = findAllPossibleEntitiesAndGroupByValue(node, pageNumbers, entryValues); - assert allValuesFound(tempEntitiesByValue, entryValues); - - entityIdentifiers.forEach(entityIdentifier -> { - findClosestEntity(entityIdentifier, tempEntitiesByValue) - .ifPresent(entities -> entities.forEach(redactionEntity -> { - createCorrectEntity(entityIdentifier, node, redactionEntity.getBoundary()); - })); - }); + List notFoundEntityIdentifiers = new LinkedList<>(); + for (EntityIdentifier entityIdentifier : entityIdentifiers) { + Optional optionalRedactionEntity = findClosestEntityAndReturnEmptyIfNotFound(entityIdentifier, tempEntitiesByValue); + if (optionalRedactionEntity.isEmpty()) { + notFoundEntityIdentifiers.add(entityIdentifier); + continue; + } + createCorrectEntity(entityIdentifier, node, optionalRedactionEntity.get().getBoundary()); + } tempEntitiesByValue.values().stream().flatMap(Collection::stream).forEach(RedactionEntity::removeFromGraph); + return notFoundEntityIdentifiers; } @@ -93,48 +96,54 @@ public class CustomEntityCreationAdapter { */ private void createCorrectEntity(EntityIdentifier entityIdentifier, SemanticNode node, Boundary closestBoundary) { - RedactionEntity correctEntity = entityCreationService.forceByBoundary(closestBoundary, entityIdentifier.type(), entityIdentifier.entityType, node); + RedactionEntity correctEntity = entityCreationService.forceByBoundary(closestBoundary, entityIdentifier.getType(), entityIdentifier.getEntityType(), node); - if (entityIdentifier.redacted()) { - correctEntity.force(entityIdentifier.ruleIdentifier(), entityIdentifier.reason(), entityIdentifier.legalBasis()); + if (entityIdentifier.isApplied()) { + correctEntity.force(entityIdentifier.getRuleIdentifier(), entityIdentifier.getReason(), entityIdentifier.getLegalBasis()); } else { - correctEntity.skip(entityIdentifier.ruleIdentifier(), entityIdentifier.reason()); + correctEntity.skip(entityIdentifier.getRuleIdentifier(), entityIdentifier.getReason()); } correctEntity.setDictionaryEntry(entityIdentifier.isDictionaryEntry()); correctEntity.setDossierDictionaryEntry(entityIdentifier.isDossierDictionaryEntry()); } - private Optional> findClosestEntity(EntityIdentifier identifier, Map> entitiesWithSameValue) { + private Optional findClosestEntityAndReturnEmptyIfNotFound(EntityIdentifier identifier, Map> entitiesWithSameValue) { - List possibleEntities = entitiesWithSameValue.get(identifier.value().toLowerCase(Locale.ROOT)); + List possibleEntities = entitiesWithSameValue.get(identifier.getValue().toLowerCase(Locale.ROOT)); - if (possibleEntities == null || possibleEntities.isEmpty()) { - log.warn("Entity could not be created with identifier: {}, due to the value {} not being found anywhere.", identifier, identifier.value()); + if (entityIdentifierValueNotFound(possibleEntities)) { + log.warn("Entity could not be created with identifier: {}, due to the value {} not being found anywhere.", identifier, identifier.getValue()); return Optional.empty(); } Optional optionalClosestEntity = possibleEntities.stream() - .filter(entity -> pagesMatch(entity, identifier.entityPosition())) - .min(Comparator.comparingDouble(entity -> calculateMinDistance(identifier.entityPosition(), entity))); + .filter(entity -> pagesMatch(entity, identifier.getEntityPosition())) + .min(Comparator.comparingDouble(entity -> calculateMinDistance(identifier.getEntityPosition(), entity))); if (optionalClosestEntity.isEmpty()) { - log.warn("No Entity with value {} found on page {}", identifier.value(), identifier.entityPosition()); - return Optional.of(possibleEntities); + log.warn("No Entity with value {} found on page {}", identifier.getValue(), identifier.getEntityPosition()); + return Optional.empty(); } RedactionEntity closestEntity = optionalClosestEntity.get(); - double distance = calculateMinDistance(identifier.entityPosition(), closestEntity); + double distance = calculateMinDistance(identifier.getEntityPosition(), closestEntity); if (distance > MATCH_THRESHOLD) { log.warn(format("Distance to closest found entity is %.2f and therefore higher than the threshold of %.2f for \n%s \n%s", distance, MATCH_THRESHOLD, - identifier.entityPosition(), + identifier.getEntityPosition(), closestEntity.getRedactionPositionsPerPage())); - return Optional.of(possibleEntities); + return Optional.empty(); } - return Optional.of(List.of(closestEntity)); + return Optional.of(closestEntity); + } + + + private static boolean entityIdentifierValueNotFound(List possibleEntities) { + + return possibleEntities == null || possibleEntities.isEmpty(); } @@ -218,78 +227,4 @@ public class CustomEntityCreationAdapter { + Math.abs(maxY1 - maxY2); } - - private record EntityIdentifier( - String value, - List entityPosition, - String ruleIdentifier, - String reason, - String legalBasis, - String type, - EntityType entityType, - boolean redacted, - boolean isDictionaryEntry, - boolean isDossierDictionaryEntry) { - - public static EntityIdentifier fromRedactionLogEntry(RedactionLogEntry redactionLogEntry) { - - String ruleIdentifier = redactionLogEntry.getType() + "." + redactionLogEntry.getMatchedRule() + ".0"; - List rectangleWithPages = redactionLogEntry.getPositions().stream().map(RectangleWithPage::fromRedactionLogRectangle).toList(); - return new EntityIdentifier(redactionLogEntry.getValue(), - rectangleWithPages, - ruleIdentifier, - redactionLogEntry.getReason(), - redactionLogEntry.getLegalBasis(), - redactionLogEntry.getType(), - redactionLogEntry.isRecommendation() ? EntityType.RECOMMENDATION : EntityType.ENTITY, - redactionLogEntry.isRedacted(), - redactionLogEntry.isDictionaryEntry(), - redactionLogEntry.isDossierDictionaryEntry()); - } - - - public static EntityIdentifier fromManualRedactionEntry(ManualRedactionEntry manualRedactionEntry) { - - List rectangleWithPages = manualRedactionEntry.getPositions().stream().map(RectangleWithPage::fromAnnotationRectangle).toList(); - return new EntityIdentifier(manualRedactionEntry.getValue(), - rectangleWithPages, - "MAN.0.0", - manualRedactionEntry.getReason(), - manualRedactionEntry.getLegalBasis(), - manualRedactionEntry.getType(), - EntityType.ENTITY, - true, - false, - false); - } - - } - - private record RectangleWithPage(int pageNumber, Rectangle2D rectangle2D) { - - public static RectangleWithPage fromRedactionLogRectangle(Rectangle rectangle) { - - return new RectangleWithPage(rectangle.getPage(), toRectangle2D(rectangle)); - } - - - public static RectangleWithPage fromAnnotationRectangle(com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle rectangle) { - - return new RectangleWithPage(rectangle.getPage(), toRectangle2D(rectangle)); - } - - - private static Rectangle2D toRectangle2D(Rectangle rectangle) { - - return new Rectangle2D.Float(rectangle.getTopLeft().getX(), rectangle.getTopLeft().getY() + rectangle.getHeight(), rectangle.getWidth(), -rectangle.getHeight()); - } - - - private static Rectangle2D toRectangle2D(com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle rectangle) { - - return new Rectangle2D.Float(rectangle.getTopLeft().getX(), rectangle.getTopLeft().getY() + rectangle.getHeight(), rectangle.getWidth(), -rectangle.getHeight()); - } - - } - } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/EntityIdentifier.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/EntityIdentifier.java new file mode 100644 index 00000000..ec0d5dfe --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/EntityIdentifier.java @@ -0,0 +1,68 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model; + +import java.util.List; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType; + +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.experimental.FieldDefaults; + +@Getter +@AllArgsConstructor +@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) +public class EntityIdentifier { + + String value; + List entityPosition; + String ruleIdentifier; + String reason; + String legalBasis; + String type; + String section; + EntityType entityType; + boolean applied; + boolean isDictionaryEntry; + boolean isDossierDictionaryEntry; + + + public static EntityIdentifier fromRedactionLogEntry(RedactionLogEntry redactionLogEntry) { + + String ruleIdentifier = redactionLogEntry.getType() + "." + redactionLogEntry.getMatchedRule() + ".0"; + List rectangleWithPages = redactionLogEntry.getPositions().stream().map(RectangleWithPage::fromRedactionLogRectangle).toList(); + return new EntityIdentifier(redactionLogEntry.getValue(), + rectangleWithPages, + ruleIdentifier, + redactionLogEntry.getReason(), + redactionLogEntry.getLegalBasis(), + redactionLogEntry.getType(), + redactionLogEntry.getSection(), + redactionLogEntry.isRecommendation() ? EntityType.RECOMMENDATION : EntityType.ENTITY, + redactionLogEntry.isRedacted(), + redactionLogEntry.isDictionaryEntry(), + redactionLogEntry.isDossierDictionaryEntry()); + } + + + public static EntityIdentifier fromManualRedactionEntry(ManualRedactionEntry manualRedactionEntry) { + + List rectangleWithPages = manualRedactionEntry.getPositions().stream().map(RectangleWithPage::fromAnnotationRectangle).toList(); + return new EntityIdentifier(manualRedactionEntry.getValue(), + rectangleWithPages, + "MAN.0.0", + manualRedactionEntry.getReason(), + manualRedactionEntry.getLegalBasis(), + manualRedactionEntry.getType(), manualRedactionEntry.getSection(), + EntityType.ENTITY, + true, + false, + false); + } + + + + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/RectangleWithPage.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/RectangleWithPage.java new file mode 100644 index 00000000..a27c23a2 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/RectangleWithPage.java @@ -0,0 +1,32 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model; + +import java.awt.geom.Rectangle2D; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle; + +public record RectangleWithPage(int pageNumber, Rectangle2D rectangle2D) { + + public static RectangleWithPage fromRedactionLogRectangle(Rectangle rectangle) { + + return new RectangleWithPage(rectangle.getPage(), toRectangle2D(rectangle)); + } + + + public static RectangleWithPage fromAnnotationRectangle(com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle rectangle) { + + return new RectangleWithPage(rectangle.getPage(), toRectangle2D(rectangle)); + } + + + private static Rectangle2D toRectangle2D(Rectangle rectangle) { + + return new Rectangle2D.Float(rectangle.getTopLeft().getX(), rectangle.getTopLeft().getY() + rectangle.getHeight(), rectangle.getWidth(), -rectangle.getHeight()); + } + + + private static Rectangle2D toRectangle2D(com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle rectangle) { + + return new Rectangle2D.Float(rectangle.getTopLeft().getX(), rectangle.getTopLeft().getY() + rectangle.getHeight(), rectangle.getWidth(), -rectangle.getHeight()); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java index b34b141d..49a53192 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java @@ -9,7 +9,6 @@ import java.util.Map; import java.util.Set; import java.util.stream.Collectors; -import org.kie.api.runtime.KieContainer; import org.springframework.stereotype.Service; import org.springframework.web.bind.annotation.RequestBody; @@ -30,6 +29,7 @@ import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter; +import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryIncrement; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryVersion; @@ -80,19 +80,17 @@ public class AnalyzeService { log.info("Updated Dictionaries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); var wrapper = droolsExecutionService.getLatestKieContainer(analyzeRequest.getDossierTemplateId()); - log.info("Updated Rules to Version {} for file {} in dossier {}", wrapper.rulesVersion(), analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + log.info("Updated Rules to Version {} for file {} in dossier {}", wrapper.rulesVersion(), analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + + List notFoundManualRedactionEntries = addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest, document); - if (analyzeRequest.getManualRedactions() != null) { - entityRedactionService.addManualAddRedactionEntities(analyzeRequest.getManualRedactions().getEntriesToAdd(), document); - log.info("Added Manual redaction entries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - } entityRedactionService.addDictionaryEntities(dictionary, document); log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); Set addedFileAttributes = entityRedactionService.addRuleEntities(dictionary, document, wrapper.container(), analyzeRequest, nerEntities); log.info("Finished Rule Execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - List redactionLogEntries = redactionLogCreatorService.createRedactionLog(document, analyzeRequest.getDossierTemplateId()); + List redactionLogEntries = redactionLogCreatorService.createRedactionLog(document, analyzeRequest.getDossierTemplateId(), notFoundManualRedactionEntries); List legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId()); RedactionLog redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(), @@ -111,7 +109,24 @@ public class AnalyzeService { true); redactionLog.setRedactionLogEntry(importedRedactionFilteredEntries); - return finalizeAnalysis(analyzeRequest, startTime, redactionLog, document.getNumberOfPages(), dictionary.getVersion(), false, addedFileAttributes); + return finalizeAnalysis(analyzeRequest, + startTime, + redactionLog, + document.getNumberOfPages(), + dictionary.getVersion(), + false, + addedFileAttributes); + } + + + private List addManualRedactionEntriesAndReturnNotFoundEntries(AnalyzeRequest analyzeRequest, Document document) { + + List notFoundManualRedactionEntries = Collections.emptyList(); + if (analyzeRequest.getManualRedactions() != null) { + notFoundManualRedactionEntries = entityRedactionService.addManualAddRedactionEntities(analyzeRequest.getManualRedactions().getEntriesToAdd(), document); + log.info("Added Manual redaction entries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + } + return notFoundManualRedactionEntries; } @@ -153,15 +168,9 @@ public class AnalyzeService { log.info("Loaded Ner Entities for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); var wrapper = droolsExecutionService.getLatestKieContainer(analyzeRequest.getDossierTemplateId()); - log.info("Updated Rules to version {} for file {} in dossier {}", - wrapper.rulesVersion(), - analyzeRequest.getFileId(), - analyzeRequest.getDossierId()); + log.info("Updated Rules to version {} for file {} in dossier {}", wrapper.rulesVersion(), analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - if (analyzeRequest.getManualRedactions() != null) { - entityRedactionService.addManualAddRedactionEntities(analyzeRequest.getManualRedactions().getEntriesToAdd(), document); - log.info("Added Manual redaction entries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - } + List notFoundManualRedactionEntries = addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest, document); Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId()); log.info("Updated Dictionaries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); @@ -169,10 +178,15 @@ public class AnalyzeService { sectionsToReAnalyse.forEach(node -> entityRedactionService.addDictionaryEntities(dictionary, node)); log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - Set addedFileAttributes = entityRedactionService.addRuleEntities(dictionary, document, sectionsToReAnalyse, wrapper.container(), analyzeRequest, nerEntities); + Set addedFileAttributes = entityRedactionService.addRuleEntities(dictionary, + document, + sectionsToReAnalyse, + wrapper.container(), + analyzeRequest, + nerEntities); log.info("Finished Rule Execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - List newRedactionLogEntries = redactionLogCreatorService.createRedactionLog(document, analyzeRequest.getDossierTemplateId()); + List newRedactionLogEntries = redactionLogCreatorService.createRedactionLog(document, analyzeRequest.getDossierTemplateId(), notFoundManualRedactionEntries); var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId(), @@ -303,4 +317,5 @@ public class AnalyzeService { })); } } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index ed195da6..765022c3 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -18,6 +18,7 @@ import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNo import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService; import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService; import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; +import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.SearchImplementation; @@ -73,10 +74,10 @@ public class EntityRedactionService { return allFileAttributes.stream().filter(fileAttribute -> !analyzeRequest.getFileAttributes().contains(fileAttribute)).collect(Collectors.toUnmodifiableSet()); } - public void addManualAddRedactionEntities(Set manualRedactionEntries, Document document) { + public List addManualAddRedactionEntities(Set manualRedactionEntries, Document document) { // Entities are automatically added to the DocumentGraph and don't need to be inserted again. - customEntityCreationAdapter.createRedactionEntities(manualRedactionEntries, document); + return customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(manualRedactionEntries, document); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java index cbec7fde..1b11f3dc 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java @@ -1,6 +1,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; import java.util.ArrayList; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Locale; @@ -17,6 +18,7 @@ import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Image; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType; import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations; +import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -29,7 +31,7 @@ public class RedactionLogCreatorService { private final DictionaryService dictionaryService; - public List createRedactionLog(Document document, String dossierTemplateId) { + public List createRedactionLog(Document document, String dossierTemplateId, List notFoundManualRedactionEntries) { List entries = new ArrayList<>(); Set processedIds = new HashSet<>(); @@ -39,6 +41,7 @@ public class RedactionLogCreatorService { .filter(entity -> !entity.isRemoved()) .forEach(entityNode -> entries.addAll(toRedactionLogEntries(entityNode, processedIds, dossierTemplateId))); document.streamAllImages().filter(image -> !image.isRemoved()).forEach(imageNode -> entries.add(createRedactionLogEntry(imageNode, dossierTemplateId))); + notFoundManualRedactionEntries.forEach(entityIdentifier -> entries.add(createRedactionLogEntry(entityIdentifier, dossierTemplateId))); return entries; } @@ -109,6 +112,31 @@ public class RedactionLogCreatorService { .build(); } + public RedactionLogEntry createRedactionLogEntry(EntityIdentifier entityIdentifier, String dossierTemplateId) { + + return RedactionLogEntry.builder() + .color(getColor(entityIdentifier.getType(), dossierTemplateId, entityIdentifier.isApplied())) + .reason(entityIdentifier.getReason()) + .legalBasis(entityIdentifier.getLegalBasis()) + .value(entityIdentifier.getValue()) + .type(entityIdentifier.getType()) + .redacted(entityIdentifier.isApplied()) + .isHint(isHint(entityIdentifier.getType(), dossierTemplateId)) + .isRecommendation(entityIdentifier.getEntityType().equals(EntityType.RECOMMENDATION)) + .isFalsePositive(entityIdentifier.getEntityType().equals(EntityType.FALSE_POSITIVE) || entityIdentifier.getEntityType().equals(EntityType.FALSE_RECOMMENDATION)) + .section(entityIdentifier.getSection()) + .sectionNumber(0) + .matchedRule("ManualRedaction") + .isDictionaryEntry(entityIdentifier.isDictionaryEntry()) + .textAfter("") + .textBefore("") + .startOffset(-1) + .endOffset(-1) + .isDossierDictionaryEntry(entityIdentifier.isDossierDictionaryEntry()) + .engines(Collections.emptySet()) + .reference(Collections.emptySet()) + .build(); + } public RedactionLogEntry createRedactionLogEntry(Image image, String dossierTemplateId) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/ManualRedactionEntryTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/ManualRedactionEntryTest.java new file mode 100644 index 00000000..13f3674e --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/ManualRedactionEntryTest.java @@ -0,0 +1,139 @@ +package com.iqser.red.service.redaction.v1.server.document.graph; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.when; +import static org.wildfly.common.Assert.assertFalse; +import static org.wildfly.common.Assert.assertTrue; + +import java.awt.geom.Rectangle2D; +import java.util.List; +import java.util.Set; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.MockitoAnnotations; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.mock.mockito.MockBean; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; +import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService; +import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService; +import com.iqser.red.service.redaction.v1.server.redaction.adapter.CustomEntityCreationAdapter; +import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier; +import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService; +import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService; + +import lombok.SneakyThrows; + +public class ManualRedactionEntryTest extends BuildDocumentIntegrationTest { + + @Autowired + private EntityEnrichmentService entityEnrichmentService; + + @Autowired + private CustomEntityCreationAdapter customEntityCreationAdapter; + + @Autowired + private RedactionLogCreatorService redactionLogCreatorService; + + @MockBean + private DictionaryService dictionaryService; + + + @BeforeEach + public void stubMethods() { + + MockitoAnnotations.openMocks(this); + when(dictionaryService.getColor(DICTIONARY_AUTHOR, TEST_DOSSIER_TEMPLATE_ID)).thenReturn(new float[]{0f, 0f, 0f}); + } + + + @Test + @SneakyThrows + public void manualAddRedactionTest() { + + Document document = buildGraph("files/new/VV-919901.pdf"); + EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService); + + List tempEntities = entityCreationService.byString("To: Syngenta Ltd.", "temp", EntityType.ENTITY, document).toList(); + assertFalse(tempEntities.isEmpty()); + var tempEntity = tempEntities.get(0); + List positions = tempEntity.getRedactionPositionsPerPage() + .stream() + .flatMap(redactionPosition -> redactionPosition.getRectanglePerLine() + .stream() + .map(rectangle2D -> toAnnotationRectangle(rectangle2D, redactionPosition.getPage().getNumber()))) + .toList(); + + ManualRedactionEntry manualRedactionEntry = ManualRedactionEntry.builder() + .type("manual") + .value(tempEntity.getValue()) + .reason("reason") + .legalBasis("n-a") + .section(tempEntity.getDeepestFullyContainingNode().toString()) + .rectangle(true) + .positions(positions) + .textAfter("") + .textBefore("") + .build(); + + tempEntity.removeFromGraph(); + assertTrue(document.getEntities().isEmpty()); + + List notFoundEntityIdentifiers = customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set.of(manualRedactionEntry), + document); + assertTrue(notFoundEntityIdentifiers.isEmpty()); + assertEquals(1, document.getEntities().size()); + } + + + @Test + @SneakyThrows + public void manualAddRedactionFailingTest() { + + Document document = buildGraph("files/new/VV-919901.pdf"); + // This is important due to PDFTron Web Viewer reordering the content, such that this string is selectable. + String value = "To: Syngenta Ltd. Jealott’s Hill"; + String type = DICTIONARY_AUTHOR; + ManualRedactionEntry manualRedactionEntry = ManualRedactionEntry.builder() + .type(type) + .value(value) + .reason("reason") + .legalBasis("n-a") + .section("n-a") + .rectangle(true) + .positions(List.of(new Rectangle(new Point(90, 262), 11, 88, 1), new Rectangle(new Point(90, 247), 11, 131, 1))) + .textAfter("") + .textBefore("") + .build(); + + assertTrue(document.getEntities().isEmpty()); + + List notFoundEntityIdentifiers = customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set.of(manualRedactionEntry), + document); + assertEquals(1, notFoundEntityIdentifiers.size()); + assertTrue(document.getEntities().isEmpty()); + + List redactionLogEntries = redactionLogCreatorService.createRedactionLog(document, TEST_DOSSIER_TEMPLATE_ID, notFoundEntityIdentifiers); + + assertEquals(1, redactionLogEntries.size()); + assertEquals(value, redactionLogEntries.get(0).getValue()); + assertEquals(type, redactionLogEntries.get(0).getType()); + } + + + public static Rectangle toAnnotationRectangle(Rectangle2D rectangle2D, int pageNumber) { + + return new Rectangle(new Point((float) rectangle2D.getMinX(), (float) (rectangle2D.getMinY() + rectangle2D.getHeight())), + (float) rectangle2D.getWidth(), + -(float) rectangle2D.getHeight(), + pageNumber); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/MigrationPocTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/MigrationPocTest.java index f43bbacd..46bace94 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/MigrationPocTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/MigrationPocTest.java @@ -26,6 +26,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlo import com.iqser.red.service.redaction.v1.server.document.data.mapper.DocumentGraphMapper; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; import com.iqser.red.service.redaction.v1.server.redaction.adapter.CustomEntityCreationAdapter; +import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier; import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService; import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType; import com.knecon.fforesight.tenantcommons.TenantContext; @@ -85,9 +86,9 @@ public class MigrationPocTest extends BuildDocumentIntegrationTest { // IMPORTANT: always use the graph which is mapped from the DocumentData, since rounding errors occur during storage. Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(request.getDossierId(), request.getFileId())); - redactionLogAdapter.toRedactionEntity(originalRedactionLog, document); + List notFoundManualRedactionEntries = redactionLogAdapter.toRedactionEntity(originalRedactionLog, document); - var migratedRedactionLogEntries = redactionLogCreatorService.createRedactionLog(document, TEST_DOSSIER_TEMPLATE_ID); + var migratedRedactionLogEntries = redactionLogCreatorService.createRedactionLog(document, TEST_DOSSIER_TEMPLATE_ID, notFoundManualRedactionEntries); Map migratedIds = migratedRedactionLogEntries.stream().collect(toMap(RedactionLogEntry::getId, Functions.identity())); Map newIds = newRedactionLog.getRedactionLogEntry().stream().collect(toMap(RedactionLogEntry::getId, Functions.identity()));