From b78f6d2d0f2b23028b24601935f09f01012def7d Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Thu, 12 Oct 2023 12:30:43 +0200 Subject: [PATCH] DM-483: endless loops with false positive * fix byNerEntity always creating a new Entity and therefore enabling endless loops Signed-off-by: Kilian Schuettler --- .../document/EntityCreationService.java | 149 ++++++++++-------- .../v1/server/DocumineFloraTest.java | 8 + 2 files changed, 92 insertions(+), 65 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java index cff4eb6d..b772288c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java @@ -36,6 +36,7 @@ import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility; import com.iqser.red.service.redaction.v1.server.model.NerEntities; import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation; import com.iqser.red.service.redaction.v1.server.utils.IdBuilder; +import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -250,20 +251,66 @@ public class EntityCreationService { public Stream bySearchImplementation(SearchImplementation searchImplementation, String type, EntityType entityType, SemanticNode node) { - return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange()) - .stream().filter(boundary -> isValidEntityTextRange(node.getTextBlock(), boundary)) + return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange()).stream().filter(boundary -> isValidEntityTextRange(node.getTextBlock(), boundary)) .map(bounds -> byTextRange(bounds, type, entityType, node)) .filter(Optional::isPresent) .map(Optional::get); } + /** + * Creates a redaction entity based on the given boundary, type, entity type, and semantic node. + * If the document already contains an equal redaction entity, then the original Entity is returned. + * Also inserts the Entity into the kieSession. + * + * @param textRange The boundary of the redaction entity. + * @param type The type of the redaction entity. + * @param entityType The entity type of the redaction entity. + * @param node The semantic node to associate with the redaction entity. + * @return An Optional containing the redaction entity, or the previous entity if the entity already exists. + */ + public Optional byTextRange(TextRange textRange, String type, EntityType entityType, SemanticNode node) { + + return byTextRangeWithEngine(textRange, type, entityType, node, Set.of(Engine.RULE)); + } + + + /** + * Creates a redaction entity based on the given boundary, type, entity type, and semantic node. + * If the document already contains an equal redaction entity, then the original Entity is returned. + * Also inserts the Entity into the kieSession. + * + * @param textRange The boundary of the redaction entity. + * @param type The type of the redaction entity. + * @param entityType The entity type of the redaction entity. + * @param node The semantic node to associate with the redaction entity. + * @return An Optional containing the redaction entity, or the previous entity if the entity already exists. + */ + public Optional byTextRangeWithEngine(TextRange textRange, String type, EntityType entityType, SemanticNode node, Set engines) { + + if (!node.getTextRange().contains(textRange)) { + throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", textRange, node.getTextRange(), node)); + } + TextRange trimmedTextRange = textRange.trim(node.getTextBlock()); + TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType); + if (node.getEntities().contains(entity)) { + return node.getEntities().stream().filter(entity::equals).peek(e -> e.addEngines(engines)).findAny(); + } + addEntityToGraph(entity, node); + entity.addEngines(engines); + insertToKieSession(entity); + return Optional.of(entity); + } + + public Stream lineAfterStrings(List strings, String type, EntityType entityType, SemanticNode node) { TextBlock textBlock = node.getTextBlock(); SearchImplementation searchImplementation = new SearchImplementation(strings, false); return searchImplementation.getBoundaries(textBlock, node.getTextRange()) - .stream().map(boundary -> toLineAfterTextRange(textBlock, boundary)).filter(boundary -> isValidEntityTextRange(textBlock, boundary)) + .stream() + .map(boundary -> toLineAfterTextRange(textBlock, boundary)) + .filter(boundary -> isValidEntityTextRange(textBlock, boundary)) .map(boundary -> byTextRange(boundary, type, entityType, node)) .filter(Optional::isPresent) .map(Optional::get); @@ -275,29 +322,9 @@ public class EntityCreationService { TextBlock textBlock = node.getTextBlock(); SearchImplementation searchImplementation = new SearchImplementation(strings, true); return searchImplementation.getBoundaries(textBlock, node.getTextRange()) - .stream().map(boundary -> toLineAfterTextRange(textBlock, boundary)).filter(boundary -> isValidEntityTextRange(textBlock, boundary)) - .map(boundary -> byTextRange(boundary, type, entityType, node)) - .filter(Optional::isPresent) - .map(Optional::get); - } - - - public Stream lineAfterString(String string, String type, EntityType entityType, SemanticNode node) { - - TextBlock textBlock = node.getTextBlock(); - return RedactionSearchUtility.findTextRangesByString(string, textBlock) - .stream().map(boundary -> toLineAfterTextRange(textBlock, boundary)).filter(boundary -> isValidEntityTextRange(textBlock, boundary)) - .map(boundary -> byTextRange(boundary, type, entityType, node)) - .filter(Optional::isPresent) - .map(Optional::get); - } - - - public Stream lineAfterStringIgnoreCase(String string, String type, EntityType entityType, SemanticNode node) { - - TextBlock textBlock = node.getTextBlock(); - return RedactionSearchUtility.findTextRangesByStringIgnoreCase(string, textBlock) - .stream().map(boundary -> toLineAfterTextRange(textBlock, boundary)).filter(boundary -> isValidEntityTextRange(textBlock, boundary)) + .stream() + .map(boundary -> toLineAfterTextRange(textBlock, boundary)) + .filter(boundary -> isValidEntityTextRange(textBlock, boundary)) .map(boundary -> byTextRange(boundary, type, entityType, node)) .filter(Optional::isPresent) .map(Optional::get); @@ -520,31 +547,29 @@ public class EntityCreationService { } - /** - * Creates a redaction entity based on the given boundary, type, entity type, and semantic node. - * If the document already contains an equal redaction entity, then the original Entity is returned. - * Also inserts the Entity into the kieSession. - * - * @param textRange The boundary of the redaction entity. - * @param type The type of the redaction entity. - * @param entityType The entity type of the redaction entity. - * @param node The semantic node to associate with the redaction entity. - * @return An Optional containing the redaction entity, or the previous entity if the entity already exists. - */ - public Optional byTextRange(TextRange textRange, String type, EntityType entityType, SemanticNode node) { + public Stream lineAfterString(String string, String type, EntityType entityType, SemanticNode node) { - if (!node.getTextRange().contains(textRange)) { - throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", textRange, node.getTextRange(), node)); - } - TextRange trimmedTextRange = textRange.trim(node.getTextBlock()); - TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType); - if (node.getEntities().contains(entity)) { - return node.getEntities().stream().filter(entity::equals).peek(e -> e.addEngine(Engine.RULE)).findAny(); - } - addEntityToGraph(entity, node); - entity.addEngine(Engine.RULE); - insertToKieSession(entity); - return Optional.of(entity); + TextBlock textBlock = node.getTextBlock(); + return RedactionSearchUtility.findTextRangesByString(string, textBlock) + .stream() + .map(boundary -> toLineAfterTextRange(textBlock, boundary)) + .filter(boundary -> isValidEntityTextRange(textBlock, boundary)) + .map(boundary -> byTextRange(boundary, type, entityType, node)) + .filter(Optional::isPresent) + .map(Optional::get); + } + + + public Stream lineAfterStringIgnoreCase(String string, String type, EntityType entityType, SemanticNode node) { + + TextBlock textBlock = node.getTextBlock(); + return RedactionSearchUtility.findTextRangesByStringIgnoreCase(string, textBlock) + .stream() + .map(boundary -> toLineAfterTextRange(textBlock, boundary)) + .filter(boundary -> isValidEntityTextRange(textBlock, boundary)) + .map(boundary -> byTextRange(boundary, type, entityType, node)) + .filter(Optional::isPresent) + .map(Optional::get); } @@ -600,15 +625,14 @@ public class EntityCreationService { return newEntity; } + public TextEntity copyEntityWithoutRules(TextEntity entity, String type, EntityType entityType, SemanticNode node) { - TextEntity newEntity = TextEntity.initialEntityNode(entity.getTextRange(), type, entityType); - newEntity.addEngines(entity.getEngines()); + TextEntity newEntity = byTextRangeWithEngine(entity.getTextRange(), type, entityType, node, entity.getEngines()).orElseThrow(() -> new NotFoundException( + "No entity present!")); newEntity.getManualOverwrite().addChanges(entity.getManualOverwrite().getManualChangeLog()); newEntity.setDictionaryEntry(entity.isDictionaryEntry()); newEntity.setDossierDictionaryEntry(entity.isDossierDictionaryEntry()); - addEntityToGraph(newEntity, node); - insertToKieSession(newEntity); return newEntity; } @@ -623,27 +647,22 @@ public class EntityCreationService { public TextEntity byNerEntity(NerEntities.NerEntity nerEntity, EntityType entityType, SemanticNode semanticNode) { - var entity = forceByTextRange(nerEntity.textRange(), nerEntity.type(), entityType, semanticNode); - entity.addEngine(Engine.NER); - insertToKieSession(entity); - return entity; + return byTextRangeWithEngine(nerEntity.textRange(), nerEntity.type(), entityType, semanticNode, Engine.NER).orElseThrow(() -> new NotFoundException("No entity present!")); } public TextEntity byNerEntity(NerEntities.NerEntity nerEntity, String type, EntityType entityType, SemanticNode semanticNode) { - var entity = forceByTextRange(nerEntity.textRange(), type, entityType, semanticNode); - entity.addEngine(Engine.NER); - insertToKieSession(entity); - return entity; + return byTextRangeWithEngine(nerEntity.textRange(), type, entityType, semanticNode, Engine.NER).orElseThrow(() -> new NotFoundException("No entity present!")); } public Stream combineNerEntitiesToCbiAddressDefaults(NerEntities nerEntities, String type, EntityType entityType, SemanticNode semanticNode) { - return NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities).map(boundary -> forceByTextRange(boundary, type, entityType, semanticNode)) - .peek(entity -> entity.addEngine(Engine.NER)) - .peek(this::insertToKieSession); + return NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities) + .map(boundary -> byTextRangeWithEngine(boundary, type, entityType, semanticNode, Engine.NER)) + .filter(Optional::isPresent) + .map(Optional::get); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java index fb8531b4..1550be69 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java @@ -44,6 +44,13 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest { private static final String COMPONENT_RULES = loadFromClassPath("drools/documine_flora_components.drl"); + @BeforeEach + public void setUpDictionaries() { + + loadDictionaryForTest(); + mockDictionaryCalls(0L); + } + @Test // @Disabled public void titleExtraction() throws IOException { @@ -55,6 +62,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest { System.out.println("Start Full integration test"); analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request); + System.out.println("Finished structure analysis"); AnalyzeResult result = analyzeService.analyze(request); System.out.println("Finished analysis");