From a26b97e89005f9bf4fde6d07dadb4609e6b7588f Mon Sep 17 00:00:00 2001 From: Andrei Isvoran Date: Wed, 5 Jun 2024 15:45:13 +0300 Subject: [PATCH] RED-9221 - Fix component extraction in tables where rows and columns are not 1:1 --- .../redaction-service-api-v1/build.gradle.kts | 3 +- .../build.gradle.kts | 2 +- .../v1/server/model/component/Entity.java | 6 +- .../server/model/document/DocumentTree.java | 36 +++++ .../service/EntityLogCreatorService.java | 5 + .../ComponentLogCreatorService.java | 3 +- .../document/ComponentCreationService.java | 110 ++------------ .../document/EntityCreationService.java | 4 +- .../ComponentDroolsExecutionService.java | 14 +- .../server/utils/ComponentCreationUtils.java | 136 ++++++++++++++++++ .../v1/server/utils/QuintConsumer.java | 10 ++ .../v1/server/DocumineFloraTest.java | 23 +++ .../src/test/resources/files/syngenta | 2 +- 13 files changed, 246 insertions(+), 108 deletions(-) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/ComponentCreationUtils.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/QuintConsumer.java diff --git a/redaction-service-v1/redaction-service-api-v1/build.gradle.kts b/redaction-service-v1/redaction-service-api-v1/build.gradle.kts index 5a476837..97713b61 100644 --- a/redaction-service-v1/redaction-service-api-v1/build.gradle.kts +++ b/redaction-service-v1/redaction-service-api-v1/build.gradle.kts @@ -4,10 +4,11 @@ plugins { } description = "redaction-service-api-v1" +val persistenceServiceVersion = "2.439.0" dependencies { implementation("org.springframework:spring-web:6.0.12") - implementation("com.iqser.red.service:persistence-service-internal-api-v1:2.411.0") + implementation("com.iqser.red.service:persistence-service-internal-api-v1:${persistenceServiceVersion}") } publishing { diff --git a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts index 4aa1625e..6fe299f9 100644 --- a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts +++ b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts @@ -16,7 +16,7 @@ val layoutParserVersion = "0.131.0" val jacksonVersion = "2.15.2" val droolsVersion = "9.44.0.Final" val pdfBoxVersion = "3.0.0" -val persistenceServiceVersion = "2.429.0" +val persistenceServiceVersion = "2.439.0" val springBootStarterVersion = "3.1.5" val springCloudVersion = "4.0.4" val testContainersVersion = "1.19.7" diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/Entity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/Entity.java index ca4feee7..e6e539ec 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/Entity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/Entity.java @@ -68,7 +68,7 @@ public class Entity { Set importedRedactionIntersections; - public static Entity fromEntityLogEntry(EntityLogEntry e, Document document) { + public static Entity fromEntityLogEntry(EntityLogEntry e, Document document, int startOffset, int endOffset) { return Entity.builder() .id(e.getId()) @@ -86,8 +86,8 @@ public class Entity { .containingNode(document.getDocumentTree().getEntryById(e.getContainingNodeId()).getNode()) .textBefore(e.getTextBefore()) .textAfter(e.getTextAfter()) - .startOffset(e.getStartOffset()) - .endOffset(e.getEndOffset()) + .startOffset(startOffset) + .endOffset(endOffset) .length(Optional.ofNullable(e.getValue()) .orElse("").length()) .imageHasTransparency(e.isImageHasTransparency()) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java index 8527cff0..e6c28550 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java @@ -166,6 +166,42 @@ public class DocumentTree { } + public Optional findTableCellInTable(List treeId, int start, int end) { + + return findTableCellInTableRecursively(getEntryById(treeId).getChildren(), start, end); + } + + + private Optional findTableCellInTableRecursively(List entries, int start, int end) { + + int startIdx = findFirstIdxOfContainingChildBinarySearch(entries, start); + if (startIdx < 0) { + return Optional.empty(); + } + + Entry entry = entries.get(startIdx); + + if (entry.getNode().getTextRange().contains(end) && entry.getNode() instanceof TableCell tableCell) { + if (!entry.getNode().isLeaf()) { + Optional foundInChildren = findTableCellInTableRecursively(entry.getChildren(), start, end); + if (foundInChildren.isPresent()) { + return foundInChildren; + } + } + return Optional.of(tableCell); + } + + if (!entry.getNode().isLeaf()) { + Optional foundInChildren = findTableCellInTableRecursively(entry.getChildren(), start, end); + if (foundInChildren.isPresent()) { + return foundInChildren; + } + } + + return Optional.empty(); + } + + private int findFirstIdxOfContainingChildBinarySearch(List childNodes, int start) { int low = 0; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java index 3dcfd6cf..b237c2ad 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java @@ -12,6 +12,7 @@ import java.util.stream.Collectors; import org.springframework.stereotype.Service; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.DuplicatedTextRange; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogChanges; @@ -286,6 +287,10 @@ public class EntityLogCreatorService { .textBefore(entity.getTextBefore()) .startOffset(entity.getTextRange().start()) .endOffset(entity.getTextRange().end()) + .duplicatedTextRanges(entity.getDuplicateTextRanges() + .stream() + .map(textRange -> DuplicatedTextRange.builder().start(textRange.start()).end(textRange.end()).build()) + .toList()) .dossierDictionaryEntry(entity.isDossierDictionaryEntry()) .engines(getEngines(entity.getEngines(), entity.getManualOverwrite())) //imported is no longer used, frontend should check engines diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/components/ComponentLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/components/ComponentLogCreatorService.java index 54af5c5f..7815ef44 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/components/ComponentLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/components/ComponentLogCreatorService.java @@ -31,7 +31,7 @@ public class ComponentLogCreatorService { }); List componentLogComponents = map.entrySet() .stream() - .map(entry -> new ComponentLogEntry(entry.getKey(), entry.getValue())) + .map(entry -> new ComponentLogEntry(entry.getKey(), entry.getValue(), false)) .toList(); return new ComponentLog(analysisNumber, componentRulesVersion, componentLogComponents); } @@ -41,7 +41,6 @@ public class ComponentLogCreatorService { return ComponentLogEntryValue.builder() .value(component.getValue()) - .originalValue(component.getValue()) .componentRuleId(component.getMatchedRule().toString()) .valueDescription(component.getValueDescription()) .componentLogEntityReferences(toComponentEntityReferences(component.getReferences() diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java index d5f04f6f..b42ce562 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java @@ -1,5 +1,11 @@ package com.iqser.red.service.redaction.v1.server.service.document; +import static com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils.findEntitiesFromFirstSection; +import static com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils.findEntitiesFromLongestSection; +import static com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils.getFirstTableCell; +import static com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils.joinEntitiesOnSameRow; +import static com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils.joinTypes; + import java.text.BreakIterator; import java.util.Collection; import java.util.Collections; @@ -9,7 +15,6 @@ import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Locale; -import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -19,10 +24,9 @@ import org.kie.api.runtime.KieSession; import com.iqser.red.service.redaction.v1.server.model.component.Component; import com.iqser.red.service.redaction.v1.server.model.component.Entity; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; import com.iqser.red.service.redaction.v1.server.model.drools.RuleIdentifier; +import com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils; import com.iqser.red.service.redaction.v1.server.utils.DateConverter; import lombok.AccessLevel; @@ -37,24 +41,6 @@ public class ComponentCreationService { Set referencedEntities = new HashSet<>(); - private static List findEntitiesFromLongestSection(Collection entities) { - - var entitiesBySection = entities.stream() - .collect(Collectors.groupingBy(entity -> entity.getContainingNode().getHighestParent())); - Optional longestSection = entitiesBySection.entrySet() - .stream() - .sorted(Comparator.comparingInt(ComponentCreationService::getTotalLengthOfEntities).reversed()) - .map(Map.Entry::getKey) - .findFirst(); - - if (longestSection.isEmpty()) { - return Collections.emptyList(); - } - - return entitiesBySection.get(longestSection.get()); - } - - /** * Joins entity values, and creates a component from the result. * @@ -87,15 +73,6 @@ public class ComponentCreationService { } - private static String joinTypes(Collection entities) { - - return entities.stream() - .map(Entity::getType) - .distinct() - .collect(Collectors.joining(", ")); - } - - /** * Creates a new component with the given parameters and inserts it into the kieSession. * @@ -146,20 +123,6 @@ public class ComponentCreationService { } - private static List findEntitiesFromFirstSection(Collection entities) { - - var entitiesBySection = entities.stream() - .collect(Collectors.groupingBy(entity -> entity.getContainingNode().getHighestParent())); - Optional firstSection = entitiesBySection.keySet() - .stream() - .min(SemanticNodeComparators.first()); - if (firstSection.isEmpty()) { - return Collections.emptyList(); - } - return entitiesBySection.get(firstSection.get()); - } - - /** * Joins unique entity values from the first section entities appear in, and creates a component from the result. * @@ -252,14 +215,6 @@ public class ComponentCreationService { } - private static int getTotalLengthOfEntities(Map.Entry> entry) { - - return entry.getValue() - .stream() - .mapToInt(Entity::getLength).sum(); - } - - /** * Joins unique entity values with delimiter ', ' from the section with the longest combined entity values only, and creates a component from the result. * @@ -329,7 +284,7 @@ public class ComponentCreationService { public void rowValueCount(String ruleIdentifier, String name, Collection entities) { entities.stream() - .collect(Collectors.groupingBy(this::getFirstTable)) + .collect(Collectors.groupingBy(ComponentCreationUtils::getFirstTable)) .forEach((optionalTable, groupedEntities) -> { if (optionalTable.isEmpty()) { @@ -491,60 +446,21 @@ public class ComponentCreationService { .sorted(Comparator.reverseOrder()) .distinct() .collect(Collectors.joining(", ")); + String valueDescription = String.format("Combine values of %s that are in same table row", types); + entities.stream() - .collect(Collectors.groupingBy(this::getFirstTable)) + .collect(Collectors.groupingBy(ComponentCreationUtils::getFirstTable)) .forEach((optionalTable, groupedEntities) -> { if (optionalTable.isEmpty()) { groupedEntities.forEach(entity -> create(ruleIdentifier, name, entity.getValue(), valueDescription, entity)); + return; } - - groupedEntities.stream() - .filter(entity -> entity.getContainingNode() instanceof TableCell) - .collect(Collectors.groupingBy(entity -> ((TableCell) entity.getContainingNode()).getRow())).entrySet() - .stream() - .sorted(Comparator.comparingInt(Map.Entry::getKey)) - .map(Map.Entry::getValue) - .forEach(entitiesInSameRow -> create(ruleIdentifier, - name, - entitiesInSameRow.stream() - .sorted(EntityComparators.first()) - .map(Entity::getValue) - .collect(Collectors.joining(delimiter)), - valueDescription, - entitiesInSameRow)); + joinEntitiesOnSameRow(ruleIdentifier, name, groupedEntities, valueDescription, delimiter, this::create); }); } - private Optional getFirstTable(Entity entity) { - - SemanticNode node = entity.getContainingNode(); - while (!(node instanceof Table)) { - if (!node.hasParent()) { - return Optional.empty(); - } - node = node.getParent(); - } - - return Optional.of((Table) node); - } - - - private Optional getFirstTableCell(Entity entity) { - - SemanticNode node = entity.getContainingNode(); - while (!(node instanceof TableCell)) { - if (!node.hasParent()) { - return Optional.empty(); - } - node = node.getParent(); - } - - return Optional.of((TableCell) node); - } - - /** * Creates a new component with the given rule identifier, name, value, and value description. * If the component is part of a table, it also takes a list of entities that belong to the same table row. diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java index cd58bbe7..6f758b2d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java @@ -979,8 +979,8 @@ public class EntityCreationService { return Optional.empty(); } TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType, node); - if (node.getEntities().contains(entity)) { - Optional optionalTextEntity = node.getEntities() + if (node.getDocumentTree().getRoot().getNode().getEntities().contains(entity)) { + Optional optionalTextEntity = node.getDocumentTree().getRoot().getNode().getEntities() .stream() .filter(e -> e.equals(entity) && e.type().equals(type)) .peek(e -> e.addEngines(engines)) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java index a0fa79dd..1a0ef293 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java @@ -1,5 +1,6 @@ package com.iqser.red.service.redaction.v1.server.service.drools; +import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import java.util.Set; @@ -67,8 +68,19 @@ public class ComponentDroolsExecutionService { entityLog.getEntityLogEntry() .stream() .filter(this::isApplied) - .map(entry -> Entity.fromEntityLogEntry(entry, document)) + .filter(entityLogEntry -> entityLogEntry.getState().equals(EntryState.APPLIED)) + .flatMap(entry -> { + List entities = new ArrayList<>(); + entities.add(Entity.fromEntityLogEntry(entry, document, entry.getStartOffset(), entry.getEndOffset())); + if (entry.getDuplicatedTextRanges() != null && !entry.getDuplicatedTextRanges().isEmpty()) { + entry.getDuplicatedTextRanges().forEach(duplicatedTextRange -> { + entities.add(Entity.fromEntityLogEntry(entry, document, duplicatedTextRange.getStart(), duplicatedTextRange.getEnd())); + }); + } + return entities.stream(); + }) .forEach(kieSession::insert); + fileAttributes.stream() .filter(f -> f.getValue() != null) .forEach(kieSession::insert); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/ComponentCreationUtils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/ComponentCreationUtils.java new file mode 100644 index 00000000..674c88f0 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/ComponentCreationUtils.java @@ -0,0 +1,136 @@ +package com.iqser.red.service.redaction.v1.server.utils; + +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; + +import com.iqser.red.service.redaction.v1.server.model.component.Entity; +import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; +import com.iqser.red.service.redaction.v1.server.service.document.EntityComparators; +import com.iqser.red.service.redaction.v1.server.service.document.SemanticNodeComparators; + +public class ComponentCreationUtils { + + public static TableCell getFirstTableCell(Collection uniqueEntities) { + + return (TableCell) uniqueEntities.stream() + .findFirst() + .orElseThrow(() -> new IllegalArgumentException("No entities found")).getContainingNode(); + } + + + public static String joinTypes(Collection entities) { + + return entities.stream() + .map(Entity::getType) + .distinct() + .collect(Collectors.joining(", ")); + } + + + public static List findEntitiesFromLongestSection(Collection entities) { + + var entitiesBySection = entities.stream() + .collect(Collectors.groupingBy(entity -> entity.getContainingNode().getHighestParent())); + Optional longestSection = entitiesBySection.entrySet() + .stream() + .sorted(Comparator.comparingInt(ComponentCreationUtils::getTotalLengthOfEntities).reversed()) + .map(Map.Entry::getKey) + .findFirst(); + + if (longestSection.isEmpty()) { + return Collections.emptyList(); + } + + return entitiesBySection.get(longestSection.get()); + } + + + public static List findEntitiesFromFirstSection(Collection entities) { + + var entitiesBySection = entities.stream() + .collect(Collectors.groupingBy(entity -> entity.getContainingNode().getHighestParent())); + Optional firstSection = entitiesBySection.keySet() + .stream() + .min(SemanticNodeComparators.first()); + if (firstSection.isEmpty()) { + return Collections.emptyList(); + } + return entitiesBySection.get(firstSection.get()); + } + + + public static Optional getFirstTableCell(Entity entity) { + + SemanticNode node = entity.getContainingNode(); + while (!(node instanceof TableCell)) { + if (!node.hasParent()) { + return Optional.empty(); + } + node = node.getParent(); + } + + return Optional.of((TableCell) node); + } + + + public static Optional
getFirstTable(Entity entity) { + + SemanticNode node = entity.getContainingNode(); + while (!(node instanceof Table)) { + if (!node.hasParent()) { + return Optional.empty(); + } + node = node.getParent(); + } + + return Optional.of((Table) node); + } + + + public static void joinEntitiesOnSameRow(String ruleIdentifier, String name, List groupedEntities, String valueDescription, String delimiter, QuintConsumer> create) { + + groupedEntities.stream() + .filter(entity -> entity.getContainingNode() instanceof TableCell || entity.getContainingNode() instanceof Table) + .collect(Collectors.groupingBy(entity -> { + if (entity.getContainingNode() instanceof TableCell) { + return ((TableCell) entity.getContainingNode()).getRow(); + } else { + DocumentTree documentTree = entity.getContainingNode().getDocumentTree(); + Optional tableCell = documentTree.findTableCellInTable(entity.getContainingNode().getTreeId(), + entity.getStartOffset(), + entity.getEndOffset()); + return tableCell.map(TableCell::getRow) + .orElse(0); + + } + })).entrySet() + .stream() + .sorted(Comparator.comparingInt(Map.Entry::getKey)) + .map(Map.Entry::getValue) + .forEach(entitiesInSameRow -> create.accept(ruleIdentifier, + name, + entitiesInSameRow.stream() + .sorted(EntityComparators.first()) + .map(Entity::getValue) + .collect(Collectors.joining(delimiter)), + valueDescription, + entitiesInSameRow)); + } + + + public static int getTotalLengthOfEntities(Map.Entry> entry) { + + return entry.getValue() + .stream() + .mapToInt(Entity::getLength).sum(); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/QuintConsumer.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/QuintConsumer.java new file mode 100644 index 00000000..fe4423e0 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/QuintConsumer.java @@ -0,0 +1,10 @@ +package com.iqser.red.service.redaction.v1.server.utils; + +import java.util.Collection; + +@FunctionalInterface +public interface QuintConsumer> { + + void accept(T t, U u, V v, W w, X x); + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java index b6127b05..739c6c30 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java @@ -11,6 +11,7 @@ import java.time.OffsetDateTime; import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.regex.Pattern; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; @@ -32,6 +33,7 @@ import com.iqser.red.commons.jackson.ObjectMapperFactory; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult; import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.componentlog.ComponentLogEntryValue; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive; @@ -286,4 +288,25 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest { .get(0).getValue()); } + + @Test + public void testDoseMortalityExtraction() { + + AnalyzeRequest request = uploadFileToStorage("files/syngenta/CustomerFiles/Documine/Flora/VV-547525_Toxicidade_Oral_Aguda.pdf"); + + System.out.println("Start Full integration test"); + analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request); + System.out.println("Finished structure analysis"); + analyzeService.analyze(request); + System.out.println("Finished analysis"); + + var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID); + var doseMortality = componentLog.getComponentLogEntries().stream().filter(componentLogEntry -> componentLogEntry.getName().equals("Dose_Mortality")).findFirst().get(); + + assertEquals(doseMortality.getComponentValues().size(), 5); + + Pattern pattern = Pattern.compile("^5000, [SD]$"); + boolean allMatch = doseMortality.getComponentValues().stream().map(ComponentLogEntryValue::getValue).allMatch(pattern.asPredicate()); + assertTrue(allMatch); + } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/syngenta b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/syngenta index 21fefb64..5705cc07 160000 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/syngenta +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/syngenta @@ -1 +1 @@ -Subproject commit 21fefb64bf27ca2b3329a6c69d90a27450b17930 +Subproject commit 5705cc0782605fdca5dfff134b436f7143c9e421 -- 2.47.2