DM-483: endless loops with false positive

* fix byNerEntity always creating a new Entity and therefore enabling endless loops

Signed-off-by: Kilian Schuettler <kilian.schuettler@knecon.com>
This commit is contained in:
Kilian Schuettler 2023-10-12 12:30:43 +02:00
parent 37627aa79e
commit b78f6d2d0f
2 changed files with 92 additions and 65 deletions

View File

@ -36,6 +36,7 @@ import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility;
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation;
import com.iqser.red.service.redaction.v1.server.utils.IdBuilder;
import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@ -250,20 +251,66 @@ public class EntityCreationService {
public Stream<TextEntity> bySearchImplementation(SearchImplementation searchImplementation, String type, EntityType entityType, SemanticNode node) {
return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange())
.stream().filter(boundary -> isValidEntityTextRange(node.getTextBlock(), boundary))
return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange()).stream().filter(boundary -> isValidEntityTextRange(node.getTextBlock(), boundary))
.map(bounds -> byTextRange(bounds, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
}
/**
* Creates a redaction entity based on the given boundary, type, entity type, and semantic node.
* If the document already contains an equal redaction entity, then the original Entity is returned.
* Also inserts the Entity into the kieSession.
*
* @param textRange The boundary of the redaction entity.
* @param type The type of the redaction entity.
* @param entityType The entity type of the redaction entity.
* @param node The semantic node to associate with the redaction entity.
* @return An Optional containing the redaction entity, or the previous entity if the entity already exists.
*/
public Optional<TextEntity> byTextRange(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
return byTextRangeWithEngine(textRange, type, entityType, node, Set.of(Engine.RULE));
}
/**
* Creates a redaction entity based on the given boundary, type, entity type, and semantic node.
* If the document already contains an equal redaction entity, then the original Entity is returned.
* Also inserts the Entity into the kieSession.
*
* @param textRange The boundary of the redaction entity.
* @param type The type of the redaction entity.
* @param entityType The entity type of the redaction entity.
* @param node The semantic node to associate with the redaction entity.
* @return An Optional containing the redaction entity, or the previous entity if the entity already exists.
*/
public Optional<TextEntity> byTextRangeWithEngine(TextRange textRange, String type, EntityType entityType, SemanticNode node, Set<Engine> engines) {
if (!node.getTextRange().contains(textRange)) {
throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", textRange, node.getTextRange(), node));
}
TextRange trimmedTextRange = textRange.trim(node.getTextBlock());
TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType);
if (node.getEntities().contains(entity)) {
return node.getEntities().stream().filter(entity::equals).peek(e -> e.addEngines(engines)).findAny();
}
addEntityToGraph(entity, node);
entity.addEngines(engines);
insertToKieSession(entity);
return Optional.of(entity);
}
public Stream<TextEntity> lineAfterStrings(List<String> strings, String type, EntityType entityType, SemanticNode node) {
TextBlock textBlock = node.getTextBlock();
SearchImplementation searchImplementation = new SearchImplementation(strings, false);
return searchImplementation.getBoundaries(textBlock, node.getTextRange())
.stream().map(boundary -> toLineAfterTextRange(textBlock, boundary)).filter(boundary -> isValidEntityTextRange(textBlock, boundary))
.stream()
.map(boundary -> toLineAfterTextRange(textBlock, boundary))
.filter(boundary -> isValidEntityTextRange(textBlock, boundary))
.map(boundary -> byTextRange(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
@ -275,29 +322,9 @@ public class EntityCreationService {
TextBlock textBlock = node.getTextBlock();
SearchImplementation searchImplementation = new SearchImplementation(strings, true);
return searchImplementation.getBoundaries(textBlock, node.getTextRange())
.stream().map(boundary -> toLineAfterTextRange(textBlock, boundary)).filter(boundary -> isValidEntityTextRange(textBlock, boundary))
.map(boundary -> byTextRange(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
}
public Stream<TextEntity> lineAfterString(String string, String type, EntityType entityType, SemanticNode node) {
TextBlock textBlock = node.getTextBlock();
return RedactionSearchUtility.findTextRangesByString(string, textBlock)
.stream().map(boundary -> toLineAfterTextRange(textBlock, boundary)).filter(boundary -> isValidEntityTextRange(textBlock, boundary))
.map(boundary -> byTextRange(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
}
public Stream<TextEntity> lineAfterStringIgnoreCase(String string, String type, EntityType entityType, SemanticNode node) {
TextBlock textBlock = node.getTextBlock();
return RedactionSearchUtility.findTextRangesByStringIgnoreCase(string, textBlock)
.stream().map(boundary -> toLineAfterTextRange(textBlock, boundary)).filter(boundary -> isValidEntityTextRange(textBlock, boundary))
.stream()
.map(boundary -> toLineAfterTextRange(textBlock, boundary))
.filter(boundary -> isValidEntityTextRange(textBlock, boundary))
.map(boundary -> byTextRange(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
@ -520,31 +547,29 @@ public class EntityCreationService {
}
/**
* Creates a redaction entity based on the given boundary, type, entity type, and semantic node.
* If the document already contains an equal redaction entity, then the original Entity is returned.
* Also inserts the Entity into the kieSession.
*
* @param textRange The boundary of the redaction entity.
* @param type The type of the redaction entity.
* @param entityType The entity type of the redaction entity.
* @param node The semantic node to associate with the redaction entity.
* @return An Optional containing the redaction entity, or the previous entity if the entity already exists.
*/
public Optional<TextEntity> byTextRange(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> lineAfterString(String string, String type, EntityType entityType, SemanticNode node) {
if (!node.getTextRange().contains(textRange)) {
throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", textRange, node.getTextRange(), node));
}
TextRange trimmedTextRange = textRange.trim(node.getTextBlock());
TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType);
if (node.getEntities().contains(entity)) {
return node.getEntities().stream().filter(entity::equals).peek(e -> e.addEngine(Engine.RULE)).findAny();
}
addEntityToGraph(entity, node);
entity.addEngine(Engine.RULE);
insertToKieSession(entity);
return Optional.of(entity);
TextBlock textBlock = node.getTextBlock();
return RedactionSearchUtility.findTextRangesByString(string, textBlock)
.stream()
.map(boundary -> toLineAfterTextRange(textBlock, boundary))
.filter(boundary -> isValidEntityTextRange(textBlock, boundary))
.map(boundary -> byTextRange(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
}
public Stream<TextEntity> lineAfterStringIgnoreCase(String string, String type, EntityType entityType, SemanticNode node) {
TextBlock textBlock = node.getTextBlock();
return RedactionSearchUtility.findTextRangesByStringIgnoreCase(string, textBlock)
.stream()
.map(boundary -> toLineAfterTextRange(textBlock, boundary))
.filter(boundary -> isValidEntityTextRange(textBlock, boundary))
.map(boundary -> byTextRange(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
}
@ -600,15 +625,14 @@ public class EntityCreationService {
return newEntity;
}
public TextEntity copyEntityWithoutRules(TextEntity entity, String type, EntityType entityType, SemanticNode node) {
TextEntity newEntity = TextEntity.initialEntityNode(entity.getTextRange(), type, entityType);
newEntity.addEngines(entity.getEngines());
TextEntity newEntity = byTextRangeWithEngine(entity.getTextRange(), type, entityType, node, entity.getEngines()).orElseThrow(() -> new NotFoundException(
"No entity present!"));
newEntity.getManualOverwrite().addChanges(entity.getManualOverwrite().getManualChangeLog());
newEntity.setDictionaryEntry(entity.isDictionaryEntry());
newEntity.setDossierDictionaryEntry(entity.isDossierDictionaryEntry());
addEntityToGraph(newEntity, node);
insertToKieSession(newEntity);
return newEntity;
}
@ -623,27 +647,22 @@ public class EntityCreationService {
public TextEntity byNerEntity(NerEntities.NerEntity nerEntity, EntityType entityType, SemanticNode semanticNode) {
var entity = forceByTextRange(nerEntity.textRange(), nerEntity.type(), entityType, semanticNode);
entity.addEngine(Engine.NER);
insertToKieSession(entity);
return entity;
return byTextRangeWithEngine(nerEntity.textRange(), nerEntity.type(), entityType, semanticNode, Engine.NER).orElseThrow(() -> new NotFoundException("No entity present!"));
}
public TextEntity byNerEntity(NerEntities.NerEntity nerEntity, String type, EntityType entityType, SemanticNode semanticNode) {
var entity = forceByTextRange(nerEntity.textRange(), type, entityType, semanticNode);
entity.addEngine(Engine.NER);
insertToKieSession(entity);
return entity;
return byTextRangeWithEngine(nerEntity.textRange(), type, entityType, semanticNode, Engine.NER).orElseThrow(() -> new NotFoundException("No entity present!"));
}
public Stream<TextEntity> combineNerEntitiesToCbiAddressDefaults(NerEntities nerEntities, String type, EntityType entityType, SemanticNode semanticNode) {
return NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities).map(boundary -> forceByTextRange(boundary, type, entityType, semanticNode))
.peek(entity -> entity.addEngine(Engine.NER))
.peek(this::insertToKieSession);
return NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
.map(boundary -> byTextRangeWithEngine(boundary, type, entityType, semanticNode, Engine.NER))
.filter(Optional::isPresent)
.map(Optional::get);
}

View File

@ -44,6 +44,13 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
private static final String COMPONENT_RULES = loadFromClassPath("drools/documine_flora_components.drl");
@BeforeEach
public void setUpDictionaries() {
loadDictionaryForTest();
mockDictionaryCalls(0L);
}
@Test
// @Disabled
public void titleExtraction() throws IOException {
@ -55,6 +62,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
System.out.println("Start Full integration test");
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
System.out.println("Finished structure analysis");
AnalyzeResult result = analyzeService.analyze(request);
System.out.println("Finished analysis");