From 83776b6685059c0b83cf25c35be3d23b744ffd9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kilian=20Sch=C3=BCttler?= Date: Wed, 12 Jul 2023 18:40:04 +0200 Subject: [PATCH] RED-6009: Document Tree Structure --- ....java => CustomEntityCreationAdapter.java} | 140 +++++++++++++----- .../document/factory/SectionNodeFactory.java | 1 + .../model/dictionary/Dictionary.java | 28 +++- .../model/dictionary/DictionaryModel.java | 11 +- .../dictionary/SearchImplementation.java | 7 +- .../redaction/service/AnalyzeService.java | 13 ++ .../service/DroolsExecutionService.java | 32 +++- .../service/EntityRedactionService.java | 8 + .../v1/server/RedactionAcceptanceTest.java | 8 +- .../document/graph/MigrationPocTest.java | 4 +- .../graph/SearchImplementationTest.java | 34 +++++ .../resources/drools/acceptance_rules.drl | 16 +- .../src/test/resources/drools/all_rules.drl | 16 +- .../test/resources/drools/documine_flora.drl | 10 +- .../drools/manual_redaction_rules.drl | 10 +- .../src/test/resources/drools/rules.drl | 16 +- .../src/test/resources/drools/rules_v2.drl | 9 +- .../EFSA_sanitisation_GFL_v1/rules.drl | 4 +- 18 files changed, 276 insertions(+), 91 deletions(-) rename redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/adapter/{RedactionLogEntryAdapter.java => CustomEntityCreationAdapter.java} (57%) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/SearchImplementationTest.java diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/adapter/RedactionLogEntryAdapter.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/adapter/CustomEntityCreationAdapter.java similarity index 57% rename from redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/adapter/RedactionLogEntryAdapter.java rename to redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/adapter/CustomEntityCreationAdapter.java index 76d925c8..bc2c9ba7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/adapter/RedactionLogEntryAdapter.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/adapter/CustomEntityCreationAdapter.java @@ -9,13 +9,17 @@ import java.util.Comparator; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; + import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; @@ -29,15 +33,18 @@ import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityEnrichmentService; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.SearchImplementation; +import lombok.extern.slf4j.Slf4j; + +@Slf4j @Service -public class RedactionLogEntryAdapter { +public class CustomEntityCreationAdapter { private static final double MATCH_THRESHOLD = 1; private final EntityCreationService entityCreationService; @Autowired - public RedactionLogEntryAdapter(EntityEnrichmentService entityEnrichmentService) { + public CustomEntityCreationAdapter(EntityEnrichmentService entityEnrichmentService) { entityCreationService = new EntityCreationService(entityEnrichmentService); } @@ -46,24 +53,80 @@ public class RedactionLogEntryAdapter { public Stream toRedactionEntity(RedactionLog redactionLog, SemanticNode node) { List pageNumbers = redactionLog.getRedactionLogEntry().stream().flatMap(entry -> entry.getPositions().stream().map(Rectangle::getPage)).distinct().toList(); + Set entryValues = redactionLog.getRedactionLogEntry().stream().map(RedactionLogEntry::getValue).map(String::toLowerCase).collect(Collectors.toSet()); + + Map> tempEntitiesByValue = findAllPossibleEntitiesAndGroupByValue(node, pageNumbers, entryValues); + assert allValuesFound(tempEntitiesByValue, entryValues); + + List entities = redactionLog.getRedactionLogEntry() + .stream() + .map(entry -> findClosestEntity(entry, tempEntitiesByValue).map(tempEntity -> createCorrectEntity(entry, node, tempEntity))) + .filter(Optional::isPresent) + .map(Optional::get) + .toList(); + tempEntitiesByValue.values().stream().flatMap(Collection::stream).forEach(RedactionEntity::removeFromGraph); + return entities.stream(); + } + + + private Optional findClosestEntity(RedactionLogEntry entry, Map> tempEntitiesByValue) { + + List possibleEntities = tempEntitiesByValue.get(entry.getValue().toLowerCase(Locale.ROOT)); + + if (possibleEntities == null || possibleEntities.isEmpty()) { + log.warn("Entity could not be created for manual add entry: {}, due to the string not being found.", entry); + return Optional.empty(); + } + + return findClosestRedactionEntity(entry.getPositions(), possibleEntities); + } + + + public void createRedactionEntities(Set manualRedactionEntries, SemanticNode node) { + + List pageNumbers = manualRedactionEntries.stream() + .flatMap(entry -> entry.getPositions().stream().map(com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle::getPage)) + .distinct() + .toList(); + Set entryValues = manualRedactionEntries.stream().map(ManualRedactionEntry::getValue).map(String::toLowerCase).collect(Collectors.toSet()); + + Map> tempEntitiesByValue = findAllPossibleEntitiesAndGroupByValue(node, pageNumbers, entryValues); + + manualRedactionEntries.forEach(entry -> { + List possibleEntities = tempEntitiesByValue.get(entry.getValue().toLowerCase(Locale.ROOT)); + + if (possibleEntities == null || possibleEntities.isEmpty()) { + log.warn("Entity could not be created for manual add entry: {}, due to the string not being found.", entry); + return; + } + + List originalPositions = entry.getPositions().stream().map(CustomEntityCreationAdapter::toRectangle).toList(); + findClosestRedactionEntity(originalPositions, possibleEntities).ifPresent(closestEntity -> createCorrectEntity(entry, node, closestEntity)); + }); + + tempEntitiesByValue.values().stream().flatMap(Collection::stream).forEach(RedactionEntity::removeFromGraph); + } + + + private static Rectangle toRectangle(com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle rectangle) { + + return new Rectangle(new Point(rectangle.getTopLeftX(), rectangle.getTopLeftY()), rectangle.getWidth(), rectangle.getHeight(), rectangle.getPage()); + } + + + private Map> findAllPossibleEntitiesAndGroupByValue(SemanticNode node, List pageNumbers, Set entryValues) { + if (!pageNumbers.stream().allMatch(node::isOnPage)) { throw new IllegalArgumentException(format("SemanticNode %s does not contain these pages %s present in the redaction log", node, pageNumbers.stream().filter(pageNumber -> !node.isOnPage(pageNumber)).toList())); } - Set entryValues = redactionLog.getRedactionLogEntry().stream().map(RedactionLogEntry::getValue).map(String::toLowerCase).collect(Collectors.toSet()); SearchImplementation searchImplementation = new SearchImplementation(entryValues, true); - Map> tempEntitiesByValue = findAllPossibleEntitiesAndGroupByValueIgnoringCase(node, searchImplementation); - - assert allValuesFound(tempEntitiesByValue, entryValues); - - List entities = redactionLog.getRedactionLogEntry() + return searchImplementation.getBoundaries(node.getTextBlock(), node.getBoundary()) .stream() - .map(entry -> findClosestRedactionEntity(entry, tempEntitiesByValue.get(entry.getValue().toLowerCase(Locale.ROOT)), node)) - .toList(); - tempEntitiesByValue.values().stream().flatMap(Collection::stream).forEach(RedactionEntity::removeFromGraph); - return entities.stream(); + .map(boundary -> entityCreationService.forceByBoundary(boundary, "temp", EntityType.ENTITY, node)) + .collect(groupingBy(entity -> entity.getValue().toLowerCase(Locale.ROOT))); } @@ -73,31 +136,24 @@ public class RedactionLogEntryAdapter { } - private Map> findAllPossibleEntitiesAndGroupByValueIgnoringCase(SemanticNode node, SearchImplementation searchImplementation) { - - return searchImplementation.getBoundaries(node.getTextBlock(), node.getBoundary()) - .stream() - .map(boundary -> entityCreationService.forceByBoundary(boundary, "temp", EntityType.ENTITY, node)) - .collect(groupingBy(entity -> entity.getValue().toLowerCase(Locale.ROOT))); - } - - - private RedactionEntity findClosestRedactionEntity(RedactionLogEntry redactionLogEntry, List entitiesWithSameValue, SemanticNode node) { + private Optional findClosestRedactionEntity(List originalPositions, List entitiesWithSameValue) { RedactionEntity closestEntity = entitiesWithSameValue.stream() - .filter(entity -> pagesMatch(entity, redactionLogEntry)) - .min(Comparator.comparingDouble(entity -> calculateMinDistance(redactionLogEntry, entity))) - .orElseThrow(() -> new NotFoundException(format("No entity with similar position found for %s", redactionLogEntry))); + .filter(entity -> pagesMatch(entity, originalPositions)) + .min(Comparator.comparingDouble(entity -> calculateMinDistance(originalPositions, entity))) + .orElseThrow(() -> new NotFoundException(format("No entity with similar position found for %s", originalPositions))); - double distance = calculateMinDistance(redactionLogEntry, closestEntity); + double distance = calculateMinDistance(originalPositions, closestEntity); if (distance > MATCH_THRESHOLD) { - throw new NotFoundException(format("Distance to closest found entity is %.2f for \n%s \n%s", + log.warn(format("Distance to closest found entity is %.2f and therefore higher than the threshold of %.2f for \n%s \n%s", distance, - redactionLogEntry.getPositions(), + MATCH_THRESHOLD, + originalPositions, closestEntity.getRedactionPositionsPerPage())); + return Optional.empty(); } - return createCorrectEntity(redactionLogEntry, node, closestEntity); + return Optional.of(closestEntity); } @@ -119,20 +175,30 @@ public class RedactionLogEntryAdapter { } - private static boolean pagesMatch(RedactionEntity entity, RedactionLogEntry redactionLogEntry) { + private RedactionEntity createCorrectEntity(ManualRedactionEntry redactionLogEntry, SemanticNode node, RedactionEntity closestEntity) { + + RedactionEntity correctEntity = entityCreationService.forceByBoundary(closestEntity.getBoundary(), redactionLogEntry.getType(), EntityType.ENTITY, node); + + correctEntity.force("MAN.0.0", redactionLogEntry.getReason(), redactionLogEntry.getLegalBasis()); + + return correctEntity; + } + + + private static boolean pagesMatch(RedactionEntity entity, List originalPositions) { Set entityPageNumbers = entity.getRedactionPositionsPerPage().stream().map(RedactionPosition::getPage).map(Page::getNumber).collect(Collectors.toSet()); - Set redactionLogEntryPageNumbers = redactionLogEntry.getPositions().stream().map(Rectangle::getPage).collect(Collectors.toSet()); + Set redactionLogEntryPageNumbers = originalPositions.stream().map(Rectangle::getPage).collect(Collectors.toSet()); return entityPageNumbers.equals(redactionLogEntryPageNumbers); } - private double calculateMinDistance(RedactionLogEntry redactionLogEntry, RedactionEntity entity) { + private double calculateMinDistance(List originalPositions, RedactionEntity entity) { - if (redactionLogEntry.getPositions().size() != countRectangles(entity)) { + if (originalPositions.size() != countRectangles(entity)) { return Double.MAX_VALUE; } - return redactionLogEntry.getPositions().stream().mapToDouble(redactionLogEntryRectangle -> calculateMinDistancePerRectangle(entity, redactionLogEntryRectangle)).sum(); + return originalPositions.stream().mapToDouble(redactionLogEntryRectangle -> calculateMinDistancePerRectangle(entity, redactionLogEntryRectangle)).sum(); } @@ -142,14 +208,14 @@ public class RedactionLogEntryAdapter { } - private double calculateMinDistancePerRectangle(RedactionEntity entity, Rectangle redactionLogEntryRectangle) { + private double calculateMinDistancePerRectangle(RedactionEntity entity, Rectangle originalRectangle) { return entity.getRedactionPositionsPerPage() .stream() - .filter(redactionPosition -> redactionPosition.getPage().getNumber() == redactionLogEntryRectangle.getPage()) + .filter(redactionPosition -> redactionPosition.getPage().getNumber() == originalRectangle.getPage()) .map(RedactionPosition::getRectanglePerLine) .flatMap(Collection::stream) - .mapToDouble(rectangle -> calculateDistance(rectangle, toRectangle2D(redactionLogEntryRectangle))) + .mapToDouble(rectangle -> calculateDistance(rectangle, toRectangle2D(originalRectangle))) .min() .orElse(Double.MAX_VALUE); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/factory/SectionNodeFactory.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/factory/SectionNodeFactory.java index efcf442c..6e760b9b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/factory/SectionNodeFactory.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/factory/SectionNodeFactory.java @@ -165,6 +165,7 @@ public class SectionNodeFactory { @SuppressWarnings("PMD") + // experimental feature to be used later private List findTextBlocksWithSameClassificationAndAlignsYAndSameOrientationUntilConvergence(TextPageBlock originalTextBlocks, List pageBlocks) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/dictionary/Dictionary.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/dictionary/Dictionary.java index 919e7a36..6cf4fd71 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/dictionary/Dictionary.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/dictionary/Dictionary.java @@ -2,16 +2,21 @@ package com.iqser.red.service.redaction.v1.server.redaction.model.dictionary; import static java.lang.String.format; +import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; import com.iqser.red.service.redaction.v1.server.exception.NotFoundException; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.MatchedRule; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity; import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns; @@ -48,7 +53,7 @@ public class Dictionary { public boolean hasLocalEntries() { - return dictionaryModels.stream().anyMatch(dm -> !dm.getLocalEntries().isEmpty()); + return dictionaryModels.stream().anyMatch(dm -> !dm.getLocalEntriesWithMatchedRules().isEmpty()); } @@ -88,7 +93,7 @@ public class Dictionary { } - public void addLocalDictionaryEntry(String type, String value, boolean alsoAddLastname) { + private void addLocalDictionaryEntry(String type, String value, Collection matchedRules, boolean alsoAddLastname) { if (value.isBlank()) { return; @@ -96,23 +101,30 @@ public class Dictionary { if (localAccessMap.get(type) == null) { throw new IllegalArgumentException(format("DictionaryModel of type %s does not exist", type)); } - if (localAccessMap.get(type).getLocalEntries() == null) { + if (localAccessMap.get(type).getLocalEntriesWithMatchedRules() == null) { throw new IllegalArgumentException(format("DictionaryModel of type %s has no local Entries", type)); } if (StringUtils.isEmpty(value)) { throw new IllegalArgumentException(format("%s is not a valid dictionary entry", value)); } - localAccessMap.get(type).getLocalEntries().add(value.trim()); + Set matchedRulesSet = new HashSet<>(matchedRules); + localAccessMap.get(type).getLocalEntriesWithMatchedRules().merge(value.trim(), matchedRulesSet, (set1, set2) -> Stream.concat(set1.stream(), set2.stream()).collect(Collectors.toSet())); if (alsoAddLastname) { String lastname = value.split(" ")[0]; - localAccessMap.get(type).getLocalEntries().add(lastname); + localAccessMap.get(type).getLocalEntriesWithMatchedRules().merge(lastname, matchedRulesSet, (set1, set2) -> Stream.concat(set1.stream(), set2.stream()).collect(Collectors.toSet())); } } - public void addLocalDictionaryEntry(RedactionEntity redactionEntity) { + public void recommendEverywhere(RedactionEntity redactionEntity) { - addLocalDictionaryEntry(redactionEntity.getType(), redactionEntity.getValue(), false); + addLocalDictionaryEntry(redactionEntity.getType(), redactionEntity.getValue(), redactionEntity.getMatchedRuleList(), false); + } + + + public void recommendEverywhereWithLastNameSeparately(RedactionEntity redactionEntity) { + + addLocalDictionaryEntry(redactionEntity.getType(), redactionEntity.getValue(), redactionEntity.getMatchedRuleList(), true); } @@ -125,7 +137,7 @@ public class Dictionary { while (matcher.find()) { String match = matcher.group().trim(); if (match.length() >= 3) { - addLocalDictionaryEntry(redactionEntity.getType(), match, true); + addLocalDictionaryEntry(redactionEntity.getType(), match, redactionEntity.getMatchedRuleList(), true); } } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/dictionary/DictionaryModel.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/dictionary/DictionaryModel.java index a6b43116..9e17b4c4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/dictionary/DictionaryModel.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/dictionary/DictionaryModel.java @@ -1,12 +1,13 @@ package com.iqser.red.service.redaction.v1.server.redaction.model.dictionary; import java.io.Serializable; -import java.util.HashSet; +import java.util.HashMap; import java.util.Set; import java.util.stream.Collectors; import com.iqser.red.service.dictionarymerge.commons.DictionaryEntry; import com.iqser.red.service.dictionarymerge.commons.DictionaryEntryModel; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.MatchedRule; import lombok.AllArgsConstructor; import lombok.Data; @@ -28,9 +29,9 @@ public class DictionaryModel implements Serializable { private transient SearchImplementation entriesSearch; private transient SearchImplementation falsePositiveSearch; private transient SearchImplementation falseRecommendationsSearch; - private transient SearchImplementation localSearch; - private final Set localEntries = new HashSet<>(); + private final HashMap> localEntriesWithMatchedRules = new HashMap<>(); + private transient SearchImplementation localSearch; public DictionaryModel(String type, @@ -68,8 +69,8 @@ public class DictionaryModel implements Serializable { public SearchImplementation getLocalSearch() { - if (this.localSearch == null) { - this.localSearch = new SearchImplementation(this.localEntries, caseInsensitive); + if (this.localSearch == null || this.localSearch.getValues().size() != this.localEntriesWithMatchedRules.size()) { + this.localSearch = new SearchImplementation(this.localEntriesWithMatchedRules.keySet(), caseInsensitive); } return this.localSearch; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/dictionary/SearchImplementation.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/dictionary/SearchImplementation.java index d19f9c84..aa6eab52 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/dictionary/SearchImplementation.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/dictionary/SearchImplementation.java @@ -47,7 +47,12 @@ public class SearchImplementation { if (this.values.size() == 1) { var text = this.values.iterator().next(); - this.pattern = Pattern.compile(Pattern.quote(ignoreCase ? text.toLowerCase(Locale.ROOT) : text)); + + if (this.ignoreCase) { + this.pattern = Pattern.compile(Pattern.quote(text.toLowerCase(Locale.ROOT)), Pattern.CASE_INSENSITIVE); + } else { + this.pattern = Pattern.compile(Pattern.quote(text)); + } } else { var builder = Trie.builder(); if (this.ignoreCase) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java index 9f41484f..c24b3dee 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java @@ -14,6 +14,7 @@ import org.springframework.stereotype.Service; import org.springframework.web.bind.annotation.RequestBody; import com.iqser.gin4.commons.metrics.meters.FunctionTimerValues; +import com.iqser.red.service.dictionarymerge.commons.DictionaryEntryModel; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult; import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; @@ -44,6 +45,7 @@ import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryIncrement; +import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryModel; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryVersion; import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; @@ -143,6 +145,10 @@ public class AnalyzeService { long rulesVersion = droolsExecutionService.getRulesVersion(analyzeRequest.getDossierTemplateId()); log.info("Updated Rules to Version {} for file {} in dossier {}", rulesVersion, analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + if (analyzeRequest.getManualRedactions() != null) { + entityRedactionService.addManualAddRedactionEntities(analyzeRequest.getManualRedactions().getEntriesToAdd(), document); + log.info("Added Manual redaction entries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + } entityRedactionService.addDictionaryEntities(dictionary, document); log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); @@ -215,7 +221,14 @@ public class AnalyzeService { analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + if (analyzeRequest.getManualRedactions() != null) { + entityRedactionService.addManualAddRedactionEntities(analyzeRequest.getManualRedactions().getEntriesToAdd(), document); + log.info("Added Manual redaction entries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + } + Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId()); + log.info("Updated Dictionaries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + sectionsToReAnalyse.forEach(node -> entityRedactionService.addDictionaryEntities(dictionary, node)); log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java index 88a654b2..abc763c5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java @@ -1,12 +1,21 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; +import static java.util.stream.Collectors.collectingAndThen; +import static java.util.stream.Collectors.groupingBy; +import static java.util.stream.Collectors.toList; + import java.io.ByteArrayInputStream; import java.io.InputStream; import java.nio.charset.StandardCharsets; +import java.util.Collection; +import java.util.Comparator; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.stream.Collector; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; import org.kie.api.KieServices; @@ -21,6 +30,9 @@ import org.springframework.stereotype.Service; import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; import com.iqser.red.service.redaction.v1.server.client.RulesClient; import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Document; @@ -99,10 +111,9 @@ public class DroolsExecutionService { if (manualRedactions != null) { manualRedactions.getResizeRedactions().forEach(kieSession::insert); - manualRedactions.getForceRedactions().forEach(kieSession::insert); - manualRedactions.getIdsToRemove().forEach(kieSession::insert); manualRedactions.getImageRecategorization().forEach(kieSession::insert); manualRedactions.getEntriesToAdd().forEach(kieSession::insert); + insertLatestForceOrRemovalPerAnnotationId(manualRedactions.getForceRedactions(), manualRedactions.getIdsToRemove(), kieSession); } kieSession.insert(nerEntities); @@ -115,6 +126,23 @@ public class DroolsExecutionService { } + private static void insertLatestForceOrRemovalPerAnnotationId(Collection forceRedactions, Collection idRemovals, KieSession kieSession) { + + Stream.concat(forceRedactions.stream(), idRemovals.stream()) + .filter(BaseAnnotation::isApproved) + .filter(baseAnnotation -> baseAnnotation.getRequestDate() != null) + .collect(groupingBy(BaseAnnotation::getAnnotationId, sortByRequestDate())) + .values() + .forEach(baseAnnotations -> kieSession.insert(baseAnnotations.get(0))); + } + + + private static Collector> sortByRequestDate() { + + return collectingAndThen(toList(), originList -> originList.stream().sorted(Comparator.comparing(BaseAnnotation::getRequestDate).reversed()).toList()); + } + + public List getFileAttributes(KieSession kieSession) { List fileAttributes = new LinkedList<>(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index f1357346..db34af9c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -9,7 +9,9 @@ import org.springframework.stereotype.Service; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine; +import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.CustomEntityCreationAdapter; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Document; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode; @@ -32,6 +34,7 @@ public class EntityRedactionService { DroolsExecutionService droolsExecutionService; EntityEnrichmentService entityEnrichmentService; + CustomEntityCreationAdapter customEntityCreationAdapter; public Set addRuleEntities(Dictionary dictionary, Document document, KieContainer kieContainer, AnalyzeRequest analyzeRequest, NerEntities nerEntities) { @@ -70,6 +73,11 @@ public class EntityRedactionService { return allFileAttributes.stream().filter(fileAttribute -> !analyzeRequest.getFileAttributes().contains(fileAttribute)).collect(Collectors.toUnmodifiableSet()); } + public void addManualAddRedactionEntities(Set manualRedactionEntries, Document document) { + + customEntityCreationAdapter.createRedactionEntities(manualRedactionEntries, document); + } + public void addDictionaryEntities(Dictionary dictionary, SemanticNode node) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java index 1bd38749..8c91f660 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java @@ -1,6 +1,7 @@ package com.iqser.red.service.redaction.v1.server; import static org.mockito.Mockito.when; +import static org.wildfly.common.Assert.assertFalse; import static org.wildfly.common.Assert.assertTrue; import java.io.FileOutputStream; @@ -109,6 +110,7 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { var publishedInformationEntry1 = findEntityByTypeAndValue(redactionLog, "published_information", "Oxford University Press").findFirst().orElseThrow(); var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(redactionLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getSectionNumber()).findFirst().orElseThrow(); + // works in intellij, but not mvn install, but it works in UI so idk... // assertFalse(asyaLyon1.isRedacted()); var idRemoval = buildIdRemoval(publishedInformationEntry1.getId()); @@ -159,18 +161,20 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); System.out.println("Finished structure analysis"); AnalyzeResult result = analyzeService.analyze(request); + System.out.println("Finished analysis"); var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); - RedactionLogEntry desireeEtAl = findEntityByTypeAndValue(redactionLog, "CBI_author", "Desiree").filter(e -> e.getMatchedRule().startsWith("CBI.16")) + RedactionLogEntry desireeEtAl = findEntityByTypeAndValue(redactionLog, "CBI_author", "Desiree").filter(e -> !e.isRecommendation()).filter(e -> e.getMatchedRule().startsWith("CBI.16")) .findAny() .orElseThrow(); IdRemoval removal = buildIdRemoval(desireeEtAl.getId()); request.setManualRedactions(ManualRedactions.builder().idsToRemove(Set.of(removal)).build()); analyzeService.reanalyze(request); + System.out.println("Finished reanalysis"); var redactionLog2 = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); - assertTrue(findEntityByTypeAndValue(redactionLog2, "CBI_author", "Desiree").noneMatch(e -> e.getMatchedRule().startsWith("CBI.16"))); + assertTrue(findEntityByTypeAndValue(redactionLog2, "CBI_author", "Desiree").filter(entry -> !entry.isRecommendation()).noneMatch(RedactionLogEntry::isRedacted)); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/MigrationPocTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/MigrationPocTest.java index b8e5327d..658692ea 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/MigrationPocTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/MigrationPocTest.java @@ -26,7 +26,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemp import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest; -import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.RedactionLogEntryAdapter; +import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.CustomEntityCreationAdapter; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.DocumentData; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.mapper.DocumentGraphMapper; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity; @@ -41,7 +41,7 @@ public class MigrationPocTest extends BuildDocumentIntegrationTest { private static final String RULES = loadFromClassPath("drools/rules.drl"); @Autowired - private RedactionLogEntryAdapter redactionLogAdapter; + private CustomEntityCreationAdapter redactionLogAdapter; @Autowired private RedactionLogCreatorService redactionLogCreatorService; diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/SearchImplementationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/SearchImplementationTest.java new file mode 100644 index 00000000..b80613b4 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/SearchImplementationTest.java @@ -0,0 +1,34 @@ +package com.iqser.red.service.redaction.v1.server.document.graph; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.util.List; + +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; + +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Document; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityCreationService; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityEnrichmentService; +import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.SearchImplementation; + +public class SearchImplementationTest extends BuildDocumentIntegrationTest { + + @Autowired + private EntityEnrichmentService entityEnrichmentService; + + + @Test + public void testSearchImplementationWithSingleEntry() { + + Document document = buildGraph("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections"); + + SearchImplementation searchImplementation = new SearchImplementation(List.of("mydossierredaction"), true); + EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService); + List entities = entityCreationService.bySearchImplementation(searchImplementation, "dossier_redaction", EntityType.ENTITY, document).toList(); + assertEquals(2, entities.size()); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl index 7e3ba9b4..f569be68 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl @@ -224,7 +224,7 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)" entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { entity.apply("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); + dictionary.recommendEverywhere(entity); }); end @@ -237,7 +237,7 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)" entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { entity.apply("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); + dictionary.recommendEverywhere(entity); }); end @@ -270,7 +270,7 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study"); - dictionary.addLocalDictionaryEntry(laboratoryEntity); + dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -283,7 +283,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { laboratoryEntity.apply("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.addLocalDictionaryEntry(laboratoryEntity); + dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -667,7 +667,11 @@ rule "LDS.0.0: run local dictionary search" agenda-group "LOCAL_DICTIONARY_ADDS" salience -999 when - DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels() + $dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels() then - entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document).toList(); + entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) + .forEach(entity -> { + Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); + entity.addMatchedRules(matchedRules); + }); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl index 6ba9137e..e0cd7b46 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl @@ -542,7 +542,7 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)" entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { entity.apply("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); + dictionary.recommendEverywhere(entity); }); end @@ -555,7 +555,7 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)" entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { entity.apply("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); + dictionary.recommendEverywhere(entity); }); end @@ -621,7 +621,7 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study"); - dictionary.addLocalDictionaryEntry(laboratoryEntity); + dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -634,7 +634,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { laboratoryEntity.apply("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.addLocalDictionaryEntry(laboratoryEntity); + dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -1363,7 +1363,11 @@ rule "LDS.0.0: run local dictionary search" agenda-group "LOCAL_DICTIONARY_ADDS" salience -999 when - DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels() + $dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels() then - entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document).toList(); + entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) + .forEach(entity -> { + Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); + entity.addMatchedRules(matchedRules); + }); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index 7de8aff3..86352fa1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -1141,11 +1141,11 @@ rule "LDS.0.0: run local dictionary search" agenda-group "LOCAL_DICTIONARY_ADDS" salience -999 when - DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels() + $dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels() then - entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document) - .forEach(entity -> { - entity.addEngine(Engine.RULE); - insert(entity); + entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) + .forEach(entity -> { + Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); + entity.addMatchedRules(matchedRules); }); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl index 03b2f910..eaf2ed1b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl @@ -151,11 +151,11 @@ rule "LDS.0.0: run local dictionary search" agenda-group "LOCAL_DICTIONARY_ADDS" salience -999 when - DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels() + $dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels() then - entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document) - .forEach(entity -> { - entity.addEngine(Engine.RULE); - insert(entity); + entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) + .forEach(entity -> { + Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); + entity.addMatchedRules(matchedRules); }); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index a07f5b81..932278c9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -373,7 +373,7 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)" entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { entity.apply("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); + dictionary.recommendEverywhere(entity); }); end @@ -386,7 +386,7 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)" entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { entity.apply("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); + dictionary.recommendEverywhere(entity); }); end @@ -452,7 +452,7 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study"); - dictionary.addLocalDictionaryEntry(laboratoryEntity); + dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -465,7 +465,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { laboratoryEntity.apply("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.addLocalDictionaryEntry(laboratoryEntity); + dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -1077,7 +1077,11 @@ rule "LDS.0.0: run local dictionary search" agenda-group "LOCAL_DICTIONARY_ADDS" salience -999 when - DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels() + $dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels() then - entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document).toList(); + entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) + .forEach(entity -> { + Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); + entity.addMatchedRules(matchedRules); + }); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl index 6bc2bb2b..7a451fa1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl @@ -194,11 +194,12 @@ rule "LDS.0.0: run local dictionary search" agenda-group "LOCAL_DICTIONARY_ADDS" salience -999 when - DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels() + $dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels() then - entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document) - .forEach(entity -> { - insert(entity); + entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) + .forEach(entity -> { + Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); + entity.addMatchedRules(matchedRules); }); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl index 0c1e10e7..862af6b7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl @@ -272,7 +272,7 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study"); - dictionary.addLocalDictionaryEntry(laboratoryEntity); + dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -285,7 +285,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { laboratoryEntity.apply("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.addLocalDictionaryEntry(laboratoryEntity); + dictionary.recommendEverywhere(laboratoryEntity); }); end