DM-483: endless loops with false positive
* fix byNerEntity always creating a new Entity and therefore enabling endless loops Signed-off-by: Kilian Schuettler <kilian.schuettler@knecon.com>
This commit is contained in:
parent
37627aa79e
commit
b78f6d2d0f
@ -36,6 +36,7 @@ import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility;
|
||||
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.IdBuilder;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@ -250,20 +251,66 @@ public class EntityCreationService {
|
||||
|
||||
public Stream<TextEntity> bySearchImplementation(SearchImplementation searchImplementation, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange())
|
||||
.stream().filter(boundary -> isValidEntityTextRange(node.getTextBlock(), boundary))
|
||||
return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange()).stream().filter(boundary -> isValidEntityTextRange(node.getTextBlock(), boundary))
|
||||
.map(bounds -> byTextRange(bounds, type, entityType, node))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a redaction entity based on the given boundary, type, entity type, and semantic node.
|
||||
* If the document already contains an equal redaction entity, then the original Entity is returned.
|
||||
* Also inserts the Entity into the kieSession.
|
||||
*
|
||||
* @param textRange The boundary of the redaction entity.
|
||||
* @param type The type of the redaction entity.
|
||||
* @param entityType The entity type of the redaction entity.
|
||||
* @param node The semantic node to associate with the redaction entity.
|
||||
* @return An Optional containing the redaction entity, or the previous entity if the entity already exists.
|
||||
*/
|
||||
public Optional<TextEntity> byTextRange(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return byTextRangeWithEngine(textRange, type, entityType, node, Set.of(Engine.RULE));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a redaction entity based on the given boundary, type, entity type, and semantic node.
|
||||
* If the document already contains an equal redaction entity, then the original Entity is returned.
|
||||
* Also inserts the Entity into the kieSession.
|
||||
*
|
||||
* @param textRange The boundary of the redaction entity.
|
||||
* @param type The type of the redaction entity.
|
||||
* @param entityType The entity type of the redaction entity.
|
||||
* @param node The semantic node to associate with the redaction entity.
|
||||
* @return An Optional containing the redaction entity, or the previous entity if the entity already exists.
|
||||
*/
|
||||
public Optional<TextEntity> byTextRangeWithEngine(TextRange textRange, String type, EntityType entityType, SemanticNode node, Set<Engine> engines) {
|
||||
|
||||
if (!node.getTextRange().contains(textRange)) {
|
||||
throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", textRange, node.getTextRange(), node));
|
||||
}
|
||||
TextRange trimmedTextRange = textRange.trim(node.getTextBlock());
|
||||
TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType);
|
||||
if (node.getEntities().contains(entity)) {
|
||||
return node.getEntities().stream().filter(entity::equals).peek(e -> e.addEngines(engines)).findAny();
|
||||
}
|
||||
addEntityToGraph(entity, node);
|
||||
entity.addEngines(engines);
|
||||
insertToKieSession(entity);
|
||||
return Optional.of(entity);
|
||||
}
|
||||
|
||||
|
||||
public Stream<TextEntity> lineAfterStrings(List<String> strings, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
TextBlock textBlock = node.getTextBlock();
|
||||
SearchImplementation searchImplementation = new SearchImplementation(strings, false);
|
||||
return searchImplementation.getBoundaries(textBlock, node.getTextRange())
|
||||
.stream().map(boundary -> toLineAfterTextRange(textBlock, boundary)).filter(boundary -> isValidEntityTextRange(textBlock, boundary))
|
||||
.stream()
|
||||
.map(boundary -> toLineAfterTextRange(textBlock, boundary))
|
||||
.filter(boundary -> isValidEntityTextRange(textBlock, boundary))
|
||||
.map(boundary -> byTextRange(boundary, type, entityType, node))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
@ -275,29 +322,9 @@ public class EntityCreationService {
|
||||
TextBlock textBlock = node.getTextBlock();
|
||||
SearchImplementation searchImplementation = new SearchImplementation(strings, true);
|
||||
return searchImplementation.getBoundaries(textBlock, node.getTextRange())
|
||||
.stream().map(boundary -> toLineAfterTextRange(textBlock, boundary)).filter(boundary -> isValidEntityTextRange(textBlock, boundary))
|
||||
.map(boundary -> byTextRange(boundary, type, entityType, node))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
public Stream<TextEntity> lineAfterString(String string, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
TextBlock textBlock = node.getTextBlock();
|
||||
return RedactionSearchUtility.findTextRangesByString(string, textBlock)
|
||||
.stream().map(boundary -> toLineAfterTextRange(textBlock, boundary)).filter(boundary -> isValidEntityTextRange(textBlock, boundary))
|
||||
.map(boundary -> byTextRange(boundary, type, entityType, node))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
public Stream<TextEntity> lineAfterStringIgnoreCase(String string, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
TextBlock textBlock = node.getTextBlock();
|
||||
return RedactionSearchUtility.findTextRangesByStringIgnoreCase(string, textBlock)
|
||||
.stream().map(boundary -> toLineAfterTextRange(textBlock, boundary)).filter(boundary -> isValidEntityTextRange(textBlock, boundary))
|
||||
.stream()
|
||||
.map(boundary -> toLineAfterTextRange(textBlock, boundary))
|
||||
.filter(boundary -> isValidEntityTextRange(textBlock, boundary))
|
||||
.map(boundary -> byTextRange(boundary, type, entityType, node))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
@ -520,31 +547,29 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a redaction entity based on the given boundary, type, entity type, and semantic node.
|
||||
* If the document already contains an equal redaction entity, then the original Entity is returned.
|
||||
* Also inserts the Entity into the kieSession.
|
||||
*
|
||||
* @param textRange The boundary of the redaction entity.
|
||||
* @param type The type of the redaction entity.
|
||||
* @param entityType The entity type of the redaction entity.
|
||||
* @param node The semantic node to associate with the redaction entity.
|
||||
* @return An Optional containing the redaction entity, or the previous entity if the entity already exists.
|
||||
*/
|
||||
public Optional<TextEntity> byTextRange(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> lineAfterString(String string, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
if (!node.getTextRange().contains(textRange)) {
|
||||
throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", textRange, node.getTextRange(), node));
|
||||
}
|
||||
TextRange trimmedTextRange = textRange.trim(node.getTextBlock());
|
||||
TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType);
|
||||
if (node.getEntities().contains(entity)) {
|
||||
return node.getEntities().stream().filter(entity::equals).peek(e -> e.addEngine(Engine.RULE)).findAny();
|
||||
}
|
||||
addEntityToGraph(entity, node);
|
||||
entity.addEngine(Engine.RULE);
|
||||
insertToKieSession(entity);
|
||||
return Optional.of(entity);
|
||||
TextBlock textBlock = node.getTextBlock();
|
||||
return RedactionSearchUtility.findTextRangesByString(string, textBlock)
|
||||
.stream()
|
||||
.map(boundary -> toLineAfterTextRange(textBlock, boundary))
|
||||
.filter(boundary -> isValidEntityTextRange(textBlock, boundary))
|
||||
.map(boundary -> byTextRange(boundary, type, entityType, node))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
public Stream<TextEntity> lineAfterStringIgnoreCase(String string, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
TextBlock textBlock = node.getTextBlock();
|
||||
return RedactionSearchUtility.findTextRangesByStringIgnoreCase(string, textBlock)
|
||||
.stream()
|
||||
.map(boundary -> toLineAfterTextRange(textBlock, boundary))
|
||||
.filter(boundary -> isValidEntityTextRange(textBlock, boundary))
|
||||
.map(boundary -> byTextRange(boundary, type, entityType, node))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
@ -600,15 +625,14 @@ public class EntityCreationService {
|
||||
return newEntity;
|
||||
}
|
||||
|
||||
|
||||
public TextEntity copyEntityWithoutRules(TextEntity entity, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
TextEntity newEntity = TextEntity.initialEntityNode(entity.getTextRange(), type, entityType);
|
||||
newEntity.addEngines(entity.getEngines());
|
||||
TextEntity newEntity = byTextRangeWithEngine(entity.getTextRange(), type, entityType, node, entity.getEngines()).orElseThrow(() -> new NotFoundException(
|
||||
"No entity present!"));
|
||||
newEntity.getManualOverwrite().addChanges(entity.getManualOverwrite().getManualChangeLog());
|
||||
newEntity.setDictionaryEntry(entity.isDictionaryEntry());
|
||||
newEntity.setDossierDictionaryEntry(entity.isDossierDictionaryEntry());
|
||||
addEntityToGraph(newEntity, node);
|
||||
insertToKieSession(newEntity);
|
||||
return newEntity;
|
||||
}
|
||||
|
||||
@ -623,27 +647,22 @@ public class EntityCreationService {
|
||||
|
||||
public TextEntity byNerEntity(NerEntities.NerEntity nerEntity, EntityType entityType, SemanticNode semanticNode) {
|
||||
|
||||
var entity = forceByTextRange(nerEntity.textRange(), nerEntity.type(), entityType, semanticNode);
|
||||
entity.addEngine(Engine.NER);
|
||||
insertToKieSession(entity);
|
||||
return entity;
|
||||
return byTextRangeWithEngine(nerEntity.textRange(), nerEntity.type(), entityType, semanticNode, Engine.NER).orElseThrow(() -> new NotFoundException("No entity present!"));
|
||||
}
|
||||
|
||||
|
||||
public TextEntity byNerEntity(NerEntities.NerEntity nerEntity, String type, EntityType entityType, SemanticNode semanticNode) {
|
||||
|
||||
var entity = forceByTextRange(nerEntity.textRange(), type, entityType, semanticNode);
|
||||
entity.addEngine(Engine.NER);
|
||||
insertToKieSession(entity);
|
||||
return entity;
|
||||
return byTextRangeWithEngine(nerEntity.textRange(), type, entityType, semanticNode, Engine.NER).orElseThrow(() -> new NotFoundException("No entity present!"));
|
||||
}
|
||||
|
||||
|
||||
public Stream<TextEntity> combineNerEntitiesToCbiAddressDefaults(NerEntities nerEntities, String type, EntityType entityType, SemanticNode semanticNode) {
|
||||
|
||||
return NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities).map(boundary -> forceByTextRange(boundary, type, entityType, semanticNode))
|
||||
.peek(entity -> entity.addEngine(Engine.NER))
|
||||
.peek(this::insertToKieSession);
|
||||
return NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
|
||||
.map(boundary -> byTextRangeWithEngine(boundary, type, entityType, semanticNode, Engine.NER))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -44,6 +44,13 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
||||
private static final String COMPONENT_RULES = loadFromClassPath("drools/documine_flora_components.drl");
|
||||
|
||||
|
||||
@BeforeEach
|
||||
public void setUpDictionaries() {
|
||||
|
||||
loadDictionaryForTest();
|
||||
mockDictionaryCalls(0L);
|
||||
}
|
||||
|
||||
@Test
|
||||
// @Disabled
|
||||
public void titleExtraction() throws IOException {
|
||||
@ -55,6 +62,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
System.out.println("Start Full integration test");
|
||||
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
|
||||
|
||||
System.out.println("Finished structure analysis");
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
System.out.println("Finished analysis");
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user