From 0d53edba32d56a8808bc77c68d52ca936e3e4c22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kilian=20Sch=C3=BCttler?= Date: Wed, 5 Jul 2023 21:50:45 +0200 Subject: [PATCH] RED-6929: fix acceptance tests/rules --- .../adapter/RedactionLogEntryAdapter.java | 6 +- .../parsing/PDFTextStripper.java | 3 + .../factory/DocumentGraphFactory.java | 9 +- .../document/factory/SectionNodeFactory.java | 15 +- .../document/factory/TableNodeFactory.java | 10 +- .../document/factory/TextBlockFactory.java | 6 +- .../document/graph/entity/MatchedRule.java | 39 +- .../graph/entity/MatchedRuleHolder.java | 104 +++ .../graph/entity/RedactionEntity.java | 91 +-- .../document/graph/nodes/Document.java | 9 + .../document/graph/nodes/Footer.java | 10 + .../document/graph/nodes/Header.java | 10 + .../document/graph/nodes/Headline.java | 15 +- .../document/graph/nodes/Image.java | 70 +- .../document/graph/nodes/Section.java | 17 +- .../graph/nodes/SectionIdentifier.java | 123 +++ .../document/graph/nodes/SemanticNode.java | 11 + .../graph/textblock/AtomicTextBlock.java | 34 + .../textblock/ConcatenatedTextBlock.java | 28 + .../document/graph/textblock/TextBlock.java | 9 + .../services/EntityCreationService.java | 112 ++- .../utils/RectangleTransformations.java | 9 +- .../utils/RedactionSearchUtility.java | 25 +- .../redaction/service/AnalyzeService.java | 38 +- .../redaction/service/DictionaryService.java | 47 +- .../service/DroolsExecutionService.java | 2 +- ...ManualRedactionSurroundingTextService.java | 2 +- .../service/RedactionLogCreatorService.java | 22 +- .../service/SectionFinderService.java | 2 +- .../v1/server/redaction/utils/Patterns.java | 4 +- .../v1/server/RedactionAcceptanceTest.java | 163 ++++ .../document/entity/RedactionEntityTest.java | 6 +- ...ocumentEntityInsertionIntegrationTest.java | 11 + .../ManualResizeRedactionIntegrationTest.java | 2 +- .../SectionIdentifierTest.java | 58 ++ .../graph/textblock/AtomicTextBlockTest.java | 93 +++ .../utils/RectangleTransformationsTest.java | 90 ++ .../adapter/NerEntitiesAdapterTest.java | 7 + .../resources/dictionaries/CBI_author.txt | 1 + .../dictionaries/published_information.txt | 1 + .../resources/drools/acceptance_rules.drl | 772 ++++++++++++++++++ .../src/test/resources/drools/all_rules.drl | 115 ++- .../test/resources/drools/documine_flora.drl | 91 ++- .../src/test/resources/drools/rules.drl | 93 ++- .../src/test/resources/drools/rules_v2.drl | 95 ++- 45 files changed, 2083 insertions(+), 397 deletions(-) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/MatchedRuleHolder.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/SectionIdentifier.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/sectionidentifiers/SectionIdentifierTest.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/textblock/AtomicTextBlockTest.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/utils/RectangleTransformationsTest.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/adapter/RedactionLogEntryAdapter.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/adapter/RedactionLogEntryAdapter.java index d1846ae0..791c8f81 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/adapter/RedactionLogEntryAdapter.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/adapter/RedactionLogEntryAdapter.java @@ -9,6 +9,7 @@ import java.util.Comparator; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -72,6 +73,8 @@ public class RedactionLogEntryAdapter { return searchImplementation.getBoundaries(node.getTextBlock(), node.getBoundary()) .stream() .map(boundary -> entityCreationService.byBoundary(boundary, "temp", EntityType.ENTITY, node)) + .filter(Optional::isPresent) + .map(Optional::get) .collect(groupingBy(entity -> entity.getValue().toLowerCase(Locale.ROOT))); } @@ -100,8 +103,7 @@ public class RedactionLogEntryAdapter { RedactionEntity correctEntity = entityCreationService.byBoundary(closestEntity.getBoundary(), redactionLogEntry.getType(), redactionLogEntry.isRecommendation() ? EntityType.RECOMMENDATION : EntityType.ENTITY, - node); - + node).orElseThrow(); String ruleIdentifier = redactionLogEntry.getType() + "." + redactionLogEntry.getMatchedRule() + ".0"; if (redactionLogEntry.isRedacted()) { correctEntity.apply(ruleIdentifier, redactionLogEntry.getReason(), redactionLogEntry.getLegalBasis()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/parsing/PDFTextStripper.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/parsing/PDFTextStripper.java index 18be3d0e..48dc580a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/parsing/PDFTextStripper.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/parsing/PDFTextStripper.java @@ -51,6 +51,8 @@ import org.apache.pdfbox.text.TextPosition; import org.apache.pdfbox.text.TextPositionComparator; import org.apache.pdfbox.util.QuickSort; +import com.iqser.red.service.persistence.service.v1.api.shared.model.utils.SuppressFBWarnings; + /** * This is just a copy except i only adjusted lines 594-607 cause this is a bug in Pdfbox. * see S416.pdf @@ -1737,6 +1739,7 @@ public class PDFTextStripper extends LegacyPDFStreamEngine { } + @SuppressFBWarnings private static Map MIRRORING_CHAR_MAP = new HashMap<>(); static { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/factory/DocumentGraphFactory.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/factory/DocumentGraphFactory.java index 1b64dee2..f517a93a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/factory/DocumentGraphFactory.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/factory/DocumentGraphFactory.java @@ -83,7 +83,7 @@ public class DocumentGraphFactory { List textBlocks = new ArrayList<>(textBlocksToMerge); textBlocks.add(originalTextBlock); - AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), node, context, page); + AtomicTextBlock textBlock = context.textBlockFactory.fromContext(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), node, context, page); List treeId = context.documentTree.createNewChildEntryAndReturnId(parentNode, node); node.setLeafTextBlock(textBlock); node.setTreeId(treeId); @@ -145,10 +145,7 @@ public class DocumentGraphFactory { Page page = context.getPage(textBlocks.get(0).getPage()); Footer footer = Footer.builder().documentTree(context.getDocumentTree()).build(); - AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), - footer, - context, - page); + AtomicTextBlock textBlock = context.textBlockFactory.fromContext(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), footer, context, page); List tocId = context.getDocumentTree().createNewMainEntryAndReturnId(footer); footer.setTreeId(tocId); footer.setLeafTextBlock(textBlock); @@ -160,7 +157,7 @@ public class DocumentGraphFactory { Page page = context.getPage(textBlocks.get(0).getPage()); Header header = Header.builder().documentTree(context.getDocumentTree()).build(); - AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), header, 0, page); + AtomicTextBlock textBlock = context.textBlockFactory.fromNumberOnPage(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), header, 0, page); List tocId = context.getDocumentTree().createNewMainEntryAndReturnId(header); header.setTreeId(tocId); header.setLeafTextBlock(textBlock); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/factory/SectionNodeFactory.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/factory/SectionNodeFactory.java index eabe4ecc..d162304d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/factory/SectionNodeFactory.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/factory/SectionNodeFactory.java @@ -80,7 +80,7 @@ public class SectionNodeFactory { remainingBlocks.removeAll(alreadyMerged); if (abstractPageBlock instanceof TextPageBlock) { - List textBlocks = findTextBlocksWithSameClassificationAndAlignsY(abstractPageBlock, remainingBlocks); + List textBlocks = findTextBlocksWithSameClassificationAndAlignsYAndSameOrientation(abstractPageBlock, remainingBlocks); alreadyMerged.addAll(textBlocks); DocumentGraphFactory.addParagraphOrHeadline(section, (TextPageBlock) abstractPageBlock, context, textBlocks); } else if (abstractPageBlock instanceof TablePageBlock tablePageBlock) { @@ -162,14 +162,15 @@ public class SectionNodeFactory { } - private List findTextBlocksWithSameClassificationAndAlignsY(AbstractPageBlock atc, List pageBlocks) { + private List findTextBlocksWithSameClassificationAndAlignsYAndSameOrientation(AbstractPageBlock atc, List pageBlocks) { return pageBlocks.stream() - .filter(abstractTextContainer -> !abstractTextContainer.equals(atc)) - .filter(abstractTextContainer -> abstractTextContainer.getPage() == atc.getPage()) - .filter(abstractTextContainer -> abstractTextContainer instanceof TextPageBlock) - .filter(abstractTextContainer -> abstractTextContainer.intersectsY(atc)) - .map(abstractTextContainer -> (TextPageBlock) abstractTextContainer) + .filter(abstractPageBlock -> !abstractPageBlock.equals(atc)) + .filter(abstractPageBlock -> abstractPageBlock.getPage() == atc.getPage()) + .filter(abstractPageBlock -> abstractPageBlock.getOrientation().equals(atc.getOrientation())) + .filter(abstractPageBlock -> abstractPageBlock.intersectsY(atc)) + .filter(abstractPageBlock -> abstractPageBlock instanceof TextPageBlock) + .map(abstractPageBlock -> (TextPageBlock) abstractPageBlock) .toList(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/factory/TableNodeFactory.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/factory/TableNodeFactory.java index a1a3661e..064db8ad 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/factory/TableNodeFactory.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/factory/TableNodeFactory.java @@ -33,7 +33,11 @@ public class TableNodeFactory { Set pages = tablesToMerge.stream().map(AbstractPageBlock::getPage).map(context::getPage).collect(Collectors.toSet()); List> mergedRows = tablesToMerge.stream().map(TablePageBlock::getRows).flatMap(Collection::stream).toList(); - Table table = Table.builder().documentTree(context.getDocumentTree()).numberOfCols(mergedRows.isEmpty() ? 0 :mergedRows.get(0).size()).numberOfRows(mergedRows.size()).build(); + Table table = Table.builder() + .documentTree(context.getDocumentTree()) + .numberOfCols(mergedRows.isEmpty() ? 0 : mergedRows.get(0).size()) + .numberOfRows(mergedRows.size()) + .build(); pages.forEach(page -> addTableToPage(page, parentNode, table)); @@ -109,13 +113,13 @@ public class TableNodeFactory { if (cell.getTextBlocks().isEmpty()) { tableCell.setLeafTextBlock(context.getTextBlockFactory().emptyTextBlock(tableNode, context, page)); } else if (cell.getTextBlocks().size() == 1) { - textBlock = context.getTextBlockFactory().buildAtomicTextBlock(cell.getTextBlocks().get(0).getSequences(), tableCell, context, page); + textBlock = context.getTextBlockFactory().fromContext(cell.getTextBlocks().get(0).getSequences(), tableCell, context, page); tableCell.setLeafTextBlock(textBlock); } else if (firstTextBlockIsHeadline(cell)) { SectionNodeFactory.addSection(tableCell, cell.getTextBlocks().stream().map(tb -> (AbstractPageBlock) tb).toList(), emptyList(), context); } else if (cellAreaIsSmallerThanPageAreaTimesThreshold(cell, page)) { List sequences = TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(cell.getTextBlocks()); - textBlock = context.getTextBlockFactory().buildAtomicTextBlock(sequences, tableCell, context, page); + textBlock = context.getTextBlockFactory().fromContext(sequences, tableCell, context, page); tableCell.setLeafTextBlock(textBlock); } else { cell.getTextBlocks().forEach(tb -> DocumentGraphFactory.addParagraphOrHeadline(tableCell, tb, context, emptyList())); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/factory/TextBlockFactory.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/factory/TextBlockFactory.java index a1eef095..12c0157f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/factory/TextBlockFactory.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/factory/TextBlockFactory.java @@ -17,14 +17,14 @@ public class TextBlockFactory { long textBlockIdx; - public AtomicTextBlock buildAtomicTextBlock(List sequences, SemanticNode parent, DocumentGraphFactory.Context context, Page page) { + public AtomicTextBlock fromContext(List sequences, SemanticNode parent, DocumentGraphFactory.Context context, Page page) { Integer numberOnPage = context.getAndIncrementTextBlockNumberOnPage(page); - return buildAtomicTextBlock(sequences, parent, numberOnPage, page); + return fromNumberOnPage(sequences, parent, numberOnPage, page); } - public AtomicTextBlock buildAtomicTextBlock(List sequences, SemanticNode parent, Integer numberOnPage, Page page) { + public AtomicTextBlock fromNumberOnPage(List sequences, SemanticNode parent, Integer numberOnPage, Page page) { SearchTextWithTextPositionDto searchTextWithTextPositionDto = SearchTextWithTextPositionFactory.buildSearchTextToTextPositionModel(sequences); int offset = stringOffset; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/MatchedRule.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/MatchedRule.java index 4a44e00a..b043e490 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/MatchedRule.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/MatchedRule.java @@ -4,18 +4,42 @@ import java.util.Collections; import java.util.Objects; import java.util.Set; -public record MatchedRule(RuleIdentifier ruleIdentifier, String reason, String legalBasis, boolean applied, Set references) implements Comparable { +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.experimental.FieldDefaults; + +@Getter +@Builder +@AllArgsConstructor +@EqualsAndHashCode +@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) +public final class MatchedRule implements Comparable { + + @Builder.Default + RuleIdentifier ruleIdentifier = RuleIdentifier.empty(); + @Builder.Default + String reason = ""; + @Builder.Default + String legalBasis = ""; + boolean applied; + boolean writeValueWithLineBreaks; + @Builder.Default + Set references = Collections.emptySet(); + public static MatchedRule empty() { - return new MatchedRule(RuleIdentifier.empty(), "", "", false, Collections.emptySet()); + return MatchedRule.builder().build(); } @Override public int compareTo(MatchedRule matchedRule) { - RuleIdentifier otherRuleIdentifier = matchedRule.ruleIdentifier(); + RuleIdentifier otherRuleIdentifier = matchedRule.getRuleIdentifier(); if (!Objects.equals(ruleIdentifier.type(), otherRuleIdentifier.type())) { if (Objects.equals(otherRuleIdentifier.type(), "MAN")) { return 1; @@ -24,10 +48,17 @@ public record MatchedRule(RuleIdentifier ruleIdentifier, String reason, String l return -1; } } - if (!Objects.equals(otherRuleIdentifier.unit(), ruleIdentifier().unit())) { + if (!Objects.equals(otherRuleIdentifier.unit(), getRuleIdentifier().unit())) { return otherRuleIdentifier.unit() - ruleIdentifier.unit(); } return otherRuleIdentifier.id() - ruleIdentifier.id(); } + + @Override + public String toString() { + + return "MatchedRule[" + "ruleIdentifier=" + ruleIdentifier + ", " + "reason=" + reason + ", " + "legalBasis=" + legalBasis + ", " + "applied=" + applied + ", " + "writeValueWithLineBreaks=" + writeValueWithLineBreaks + ", " + "references=" + references + ']'; + } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/MatchedRuleHolder.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/MatchedRuleHolder.java new file mode 100644 index 00000000..3da9d1b8 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/MatchedRuleHolder.java @@ -0,0 +1,104 @@ +package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity; + +import java.util.Collection; +import java.util.HashSet; +import java.util.PriorityQueue; +import java.util.Set; + +import lombok.NonNull; + +public interface MatchedRuleHolder { + + PriorityQueue getMatchedRuleList(); + + + default boolean isApplied() { + + return getMatchedRule().isApplied(); + } + + + default Set getReferences() { + + return getMatchedRule().getReferences(); + } + + + default void apply(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis) { + + if (legalBasis.isBlank() || legalBasis.isEmpty()) { + throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity"); + } + getMatchedRuleList().add(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).legalBasis(legalBasis).applied(true).build()); + } + + + default void applyWithLineBreaks(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis) { + + if (legalBasis.isBlank() || legalBasis.isEmpty()) { + throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity"); + } + getMatchedRuleList().add(MatchedRule.builder() + .ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)) + .reason(reason) + .legalBasis(legalBasis) + .applied(true) + .writeValueWithLineBreaks(true) + .build()); + } + + + default void applyWithReferences(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis, Collection references) { + + if (legalBasis.isBlank() || legalBasis.isEmpty()) { + throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity"); + } + getMatchedRuleList().add(MatchedRule.builder() + .ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)) + .reason(reason) + .legalBasis(legalBasis) + .applied(true) + .references(new HashSet<>(references)) + .build()); + } + + + default void skip(@NonNull String ruleIdentifier, String reason) { + + getMatchedRuleList().add(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).build()); + } + + + default void skipWithReferences(@NonNull String ruleIdentifier, String reason, Collection references) { + + getMatchedRuleList().add(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).references(new HashSet<>(references)).build()); + } + + + default void addMatchedRule(MatchedRule matchedRule) { + + getMatchedRuleList().add(matchedRule); + } + + + default void addMatchedRules(Collection matchedRules) { + + getMatchedRuleList().addAll(matchedRules); + } + + + default int getMatchedRuleUnit() { + + return getMatchedRule().getRuleIdentifier().unit(); + } + + + default MatchedRule getMatchedRule() { + + if (getMatchedRuleList().isEmpty()) { + return MatchedRule.empty(); + } + return getMatchedRuleList().peek(); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/RedactionEntity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/RedactionEntity.java index 4fa0f78f..b6d8a282 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/RedactionEntity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/RedactionEntity.java @@ -2,7 +2,6 @@ package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.e import java.awt.geom.Rectangle2D; import java.util.Collection; -import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.LinkedList; @@ -22,7 +21,6 @@ import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.EqualsAndHashCode; -import lombok.NonNull; import lombok.experimental.FieldDefaults; @Data @@ -30,7 +28,7 @@ import lombok.experimental.FieldDefaults; @AllArgsConstructor @FieldDefaults(level = AccessLevel.PRIVATE) @EqualsAndHashCode(onlyExplicitlyIncluded = true) -public class RedactionEntity { +public class RedactionEntity implements MatchedRuleHolder { // initial values @EqualsAndHashCode.Include @@ -55,7 +53,6 @@ public class RedactionEntity { PriorityQueue matchedRuleList = new PriorityQueue<>(); // inferred on graph insertion - @EqualsAndHashCode.Include String value; String textBefore; String textAfter; @@ -73,18 +70,6 @@ public class RedactionEntity { } - public boolean isApplied() { - - return getMatchedRule().applied(); - } - - - public Set getReferences() { - - return getMatchedRule().references(); - } - - public boolean occursInNodeOfType(Class clazz) { return intersectingNodes.stream().anyMatch(clazz::isInstance); @@ -121,6 +106,12 @@ public class RedactionEntity { } + public String getValueWithLineBreaks() { + + return getDeepestFullyContainingNode().getTextBlock().subSequenceWithLineBreaks(getBoundary()); + } + + public void removeFromGraph() { intersectingNodes.forEach(node -> node.getEntities().remove(this)); @@ -129,67 +120,21 @@ public class RedactionEntity { deepestFullyContainingNode = null; pages = new HashSet<>(); removed = true; + } + + + public void remove() { + + removed = true; + } + + + public void ignore() { + ignored = true; } - public void apply(@NonNull String ruleIdentifier, String comment, @NonNull String legalBasis) { - - if (legalBasis.isBlank() || legalBasis.isEmpty()) { - throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity"); - } - matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, legalBasis, true, Collections.emptySet())); - } - - - public void applyWithReferences(@NonNull String ruleIdentifier, String comment, @NonNull String legalBasis, Collection references) { - - if (legalBasis.isBlank() || legalBasis.isEmpty()) { - throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity"); - } - matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, legalBasis, true, new HashSet<>(references))); - } - - - public void skip(@NonNull String ruleIdentifier, String comment) { - - matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, "", false, Collections.emptySet())); - } - - - public void skipWithReferences(@NonNull String ruleIdentifier, String comment, Collection references) { - - matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, "", false, new HashSet<>(references))); - } - - - public void addMatchedRule(MatchedRule matchedRule) { - - matchedRuleList.add(matchedRule); - } - - - public void addMatchedRules(Collection matchedRules) { - - matchedRuleList.addAll(matchedRules); - } - - - public int getMatchedRuleUnit() { - - return getMatchedRule().ruleIdentifier().unit(); - } - - - public MatchedRule getMatchedRule() { - - if (matchedRuleList.isEmpty()) { - return MatchedRule.empty(); - } - return matchedRuleList.peek(); - } - - public List getRedactionPositionsPerPage() { if (redactionPositionsPerPage == null || redactionPositionsPerPage.isEmpty()) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Document.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Document.java index 719b2edb..6007f411 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Document.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Document.java @@ -35,6 +35,8 @@ public class Document implements GenericSemanticNode { TextBlock textBlock; @Builder.Default Set entities = new HashSet<>(); + @Builder.Default + static final SectionIdentifier sectionIdentifier = SectionIdentifier.document(); @Override @@ -79,6 +81,13 @@ public class Document implements GenericSemanticNode { } + @Override + public SectionIdentifier getSectionIdentifier() { + + return sectionIdentifier; + } + + @Override public Headline getHeadline() { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Footer.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Footer.java index 42688e85..5419a070 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Footer.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Footer.java @@ -23,6 +23,9 @@ import lombok.experimental.FieldDefaults; @FieldDefaults(level = AccessLevel.PRIVATE) public class Footer implements GenericSemanticNode { + @Builder.Default + final static SectionIdentifier sectionIdentifier = SectionIdentifier.empty(); + List treeId; TextBlock leafTextBlock; @@ -55,6 +58,13 @@ public class Footer implements GenericSemanticNode { } + @Override + public SectionIdentifier getSectionIdentifier() { + + return sectionIdentifier; + } + + @Override public String toString() { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Header.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Header.java index 4b7c50d8..57e89e12 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Header.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Header.java @@ -23,6 +23,9 @@ import lombok.experimental.FieldDefaults; @FieldDefaults(level = AccessLevel.PRIVATE) public class Header implements GenericSemanticNode { + @Builder.Default + final static SectionIdentifier sectionIdentifier = SectionIdentifier.empty(); + List treeId; TextBlock leafTextBlock; @@ -55,6 +58,13 @@ public class Header implements GenericSemanticNode { } + @Override + public SectionIdentifier getSectionIdentifier() { + + return sectionIdentifier; + } + + @Override public String toString() { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Headline.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Headline.java index a1507a77..8eae571b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Headline.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Headline.java @@ -26,6 +26,7 @@ public class Headline implements GenericSemanticNode { List treeId; TextBlock leafTextBlock; + SectionIdentifier sectionIdentifier; @EqualsAndHashCode.Exclude DocumentTree documentTree; @@ -70,12 +71,24 @@ public class Headline implements GenericSemanticNode { } + @Override + public SectionIdentifier getSectionIdentifier() { + + if (sectionIdentifier == null) { + sectionIdentifier = SectionIdentifier.fromSearchText(getTextBlock().getSearchText()); + } + return sectionIdentifier; + } + + public static Headline empty() { return Headline.builder().leafTextBlock(AtomicTextBlock.empty(-1L, 0, new Page(), -1, null)).build(); } - public boolean hasParagraphs(){ + + public boolean hasParagraphs() { + return getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).findFirst().isPresent(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Image.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Image.java index d9fad9e0..d2a9cc79 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Image.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Image.java @@ -1,7 +1,6 @@ package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes; import java.awt.geom.Rectangle2D; -import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -12,8 +11,8 @@ import java.util.Set; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.MatchedRule; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.MatchedRuleHolder; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity; -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RuleIdentifier; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector; @@ -23,7 +22,6 @@ import lombok.Builder; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; -import lombok.NonNull; import lombok.experimental.FieldDefaults; @Data @@ -31,7 +29,7 @@ import lombok.experimental.FieldDefaults; @AllArgsConstructor @NoArgsConstructor @FieldDefaults(level = AccessLevel.PRIVATE) -public class Image implements GenericSemanticNode { +public class Image implements GenericSemanticNode, MatchedRuleHolder { List treeId; String id; @@ -40,6 +38,7 @@ public class Image implements GenericSemanticNode { boolean transparent; Rectangle2D position; + boolean removed; boolean ignored; @Builder.Default @@ -56,72 +55,21 @@ public class Image implements GenericSemanticNode { Set entities = new HashSet<>(); - public void apply(@NonNull String ruleIdentifier, String comment, @NonNull String legalBasis) { + public boolean isActive() { - if (legalBasis.isBlank() || legalBasis.isEmpty()) { - throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity"); - } - matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, legalBasis, true, Collections.emptySet())); + return !removed && !ignored; } - public void applyWithReferences(@NonNull String ruleIdentifier, String comment, @NonNull String legalBasis, Collection references) { + public void ignore() { - if (legalBasis.isBlank() || legalBasis.isEmpty()) { - throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity"); - } - matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, legalBasis, true, new HashSet<>(references))); + ignored = true; } - public void skip(@NonNull String ruleIdentifier, String comment) { + public void remove() { - matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, "", false, Collections.emptySet())); - } - - - public void skipWithReferences(@NonNull String ruleIdentifier, String comment, Collection references) { - - matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, "", false, new HashSet<>(references))); - } - - - public void addMatchedRule(MatchedRule matchedRule) { - - matchedRuleList.add(matchedRule); - } - - - public void addMatchedRules(Collection matchedRules) { - - matchedRuleList.addAll(matchedRules); - } - - - public boolean isApplied() { - - return getMatchedRule().applied(); - } - - - public Set getReferences() { - - return getMatchedRule().references(); - } - - - public int getMatchedRuleUnit() { - - return getMatchedRule().ruleIdentifier().unit(); - } - - - public MatchedRule getMatchedRule() { - - if (matchedRuleList.isEmpty()) { - return MatchedRule.empty(); - } - return matchedRuleList.peek(); + removed = true; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Section.java index 101b9987..04082a92 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Section.java @@ -8,7 +8,6 @@ import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Do import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector; -import java.util.stream.Stream; import lombok.AccessLevel; import lombok.AllArgsConstructor; @@ -49,6 +48,13 @@ public class Section implements GenericSemanticNode { } + @Override + public SectionIdentifier getSectionIdentifier() { + + return getHeadline().getSectionIdentifier(); + } + + @Override public TextBlock getTextBlock() { @@ -75,19 +81,22 @@ public class Section implements GenericSemanticNode { } - public boolean anyHeadlineContainsString(String value){ + public boolean anyHeadlineContainsString(String value) { + return streamChildrenOfType(NodeType.HEADLINE)// .map(node -> (Headline) node).anyMatch(h -> h.containsString(value)); } - public boolean anyHeadlineContainsStringIgnoreCase(String value){ + public boolean anyHeadlineContainsStringIgnoreCase(String value) { + return streamChildrenOfType(NodeType.HEADLINE)// .map(node -> (Headline) node).anyMatch(h -> h.containsStringIgnoreCase(value)); } - public boolean hasParagraphs(){ + public boolean hasParagraphs() { + return streamAllSubNodesOfType(NodeType.PARAGRAPH).findFirst().isPresent(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/SectionIdentifier.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/SectionIdentifier.java new file mode 100644 index 00000000..b1455c2f --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/SectionIdentifier.java @@ -0,0 +1,123 @@ +package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes; + +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.experimental.FieldDefaults; + +@AllArgsConstructor +@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) +public class SectionIdentifier { + + static Pattern numericalIdentifierPattern = Pattern.compile("^[\\s]?(\\d+)[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?"); + + private enum Format { + EMPTY, + NUMERICAL, + DOCUMENT + } + + Format format; + String identifierString; + List identifiers; + boolean asChild; + + + public static SectionIdentifier fromSearchText(String headline) { + + if (headline == null || headline.isEmpty() || headline.isBlank()) { + return SectionIdentifier.empty(); + } + + Matcher numericalIdentifierMatcher = numericalIdentifierPattern.matcher(headline); + if (numericalIdentifierMatcher.find()) { + return buildNumericalSectionIdentifier(headline, numericalIdentifierMatcher); + } + // more formats here + return SectionIdentifier.empty(); + } + + + public static SectionIdentifier asChildOf(SectionIdentifier sectionIdentifier) { + + return new SectionIdentifier(sectionIdentifier.format, sectionIdentifier.toString(), sectionIdentifier.identifiers, true); + } + + + public static SectionIdentifier document() { + + return new SectionIdentifier(Format.DOCUMENT, "document", Collections.emptyList(), false); + } + + + public static SectionIdentifier empty() { + + return new SectionIdentifier(Format.EMPTY, "empty", Collections.emptyList(), false); + } + + + private static SectionIdentifier buildNumericalSectionIdentifier(String headline, Matcher numericalIdentifierMatcher) { + + String identifierString = headline.substring(numericalIdentifierMatcher.start(), numericalIdentifierMatcher.end()); + List identifiers = new LinkedList<>(); + for (int i = 1; i <= 4; i++) { + String numericalIdentifier = numericalIdentifierMatcher.group(i); + if (numericalIdentifier == null || numericalIdentifier.equals("0") || numericalIdentifier.isEmpty() || numericalIdentifier.isBlank()) { + break; + } + identifiers.add(Integer.parseInt(numericalIdentifier.trim())); + } + return new SectionIdentifier(Format.NUMERICAL, identifierString, identifiers.stream().toList(), false); + } + + + /** + * Determines if the current section is the parent of the given section. + * + * @param sectionIdentifier The section identifier to compare against. + * @return true if the current section is the parent of the given section, false otherwise. + */ + public boolean isParentOf(SectionIdentifier sectionIdentifier) { + + if (this.format.equals(Format.EMPTY)) { + return false; + } + if (this.format.equals(Format.DOCUMENT)) { + return true; + } + if (!this.format.equals(sectionIdentifier.format)) { + return false; + } + if (this.identifiers.size() >= sectionIdentifier.identifiers.size() && !(this.identifiers.size() == sectionIdentifier.identifiers.size() && sectionIdentifier.asChild)) { + return false; + } + for (int i = 0; i < this.identifiers.size(); i++) { + if (!this.identifiers.get(i).equals(sectionIdentifier.identifiers.get(i))) { + return false; + } + } + return true; + } + + + public boolean isChildOf(SectionIdentifier sectionIdentifier) { + + if (this.format.equals(Format.DOCUMENT) || this.format.equals(Format.EMPTY)) { + return false; + } + return sectionIdentifier.isParentOf(this); + } + + + @Override + public String toString() { + + return identifierString; + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/SemanticNode.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/SemanticNode.java index 3e18869b..22eff118 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/SemanticNode.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/SemanticNode.java @@ -115,6 +115,17 @@ public interface SemanticNode { } + /** + * Returns a SectionIdentifier, such that it acts as a child of the first Headline associated with this SemanticNode. + * + * @return The SectionIdentifier from the first Headline. + */ + default SectionIdentifier getSectionIdentifier() { + + return SectionIdentifier.asChildOf(getHeadline().getSectionIdentifier()); + } + + /** * Checks if its TreeId has a length greater than zero. * diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/textblock/AtomicTextBlock.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/textblock/AtomicTextBlock.java index 7a7376ec..b5ebdead 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/textblock/AtomicTextBlock.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/textblock/AtomicTextBlock.java @@ -9,6 +9,8 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.AtomicPositionBlockData; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.AtomicTextBlockData; @@ -200,6 +202,38 @@ public class AtomicTextBlock implements TextBlock { } + @Override + public String subSequenceWithLineBreaks(Boundary boundary) { + + if (boundary.length() == 0 || !getBoundary().contains(boundary)) { + return ""; + } + + CharSequence subSequence = subSequence(boundary); + Set lbInBoundary = lineBreaks.stream().filter(boundary::contains).collect(Collectors.toSet()); + if (boundary.end() == getBoundary().end()) { + lbInBoundary.add(getBoundary().length()); + } + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < subSequence.length(); i++) { + char character = subSequence.charAt(i); + if (lbInBoundary.contains(i + 1)) { + // always plus one, due to the linebreaks being an exclusive end index + if (!Character.isWhitespace(character)) { + lbInBoundary.remove(i + 1); + lbInBoundary.add(i + 2); + sb.append(character); + continue; + } + sb.append("\n"); + } else { + sb.append(character); + } + } + return sb.toString(); + } + + private List getAllLineBreaksInBoundary(Boundary boundary) { return getLineBreaks().stream().map(linebreak -> linebreak + this.boundary.start()).filter(boundary::contains).toList(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/textblock/ConcatenatedTextBlock.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/textblock/ConcatenatedTextBlock.java index 6fda9ad8..c560814e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/textblock/ConcatenatedTextBlock.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/textblock/ConcatenatedTextBlock.java @@ -172,6 +172,34 @@ public class ConcatenatedTextBlock implements TextBlock { } + @Override + public String subSequenceWithLineBreaks(Boundary boundary) { + + if (boundary.length() == 0 || !getBoundary().contains(boundary)) { + return ""; + } + + List textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(boundary); + + if (textBlocks.size() == 1) { + return textBlocks.get(0).subSequenceWithLineBreaks(boundary); + } + + StringBuilder sb = new StringBuilder(); + AtomicTextBlock firstTextBlock = textBlocks.get(0); + sb.append(firstTextBlock.subSequenceWithLineBreaks(new Boundary(boundary.start(), firstTextBlock.getBoundary().end()))); + + for (AtomicTextBlock textBlock : textBlocks.subList(1, textBlocks.size() - 1)) { + sb.append(textBlock.searchTextWithLineBreaks()); + } + + var lastTextBlock = textBlocks.get(textBlocks.size() - 1); + sb.append(lastTextBlock.subSequenceWithLineBreaks(new Boundary(lastTextBlock.getBoundary().start(), boundary.end()))); + + return sb.toString(); + } + + private Map> mergeEntityPositionsWithSamePageNode(Map> map1, Map> map2) { Map> mergedMap = new HashMap<>(map1); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/textblock/TextBlock.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/textblock/TextBlock.java index bc8cc330..f757ae81 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/textblock/TextBlock.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/textblock/TextBlock.java @@ -42,9 +42,18 @@ public interface TextBlock extends CharSequence { Map> getPositionsPerPage(Boundary stringBoundary); + String subSequenceWithLineBreaks(Boundary boundary); + + int numberOfLines(); + default String searchTextWithLineBreaks() { + + return subSequenceWithLineBreaks(getBoundary()); + } + + default int indexOf(String searchTerm) { return indexOf(searchTerm, getBoundary().start()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/services/EntityCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/services/EntityCreationService.java index 021e9de1..117bb07d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/services/EntityCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/services/EntityCreationService.java @@ -23,6 +23,7 @@ import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Do import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionPosition; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.NodeType; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Page; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.TableCell; @@ -90,7 +91,9 @@ public class EntityCreationService { return entityBoundaries.stream() .map(boundary -> boundary.trim(node.getTextBlock())) .filter(boundary -> isValidEntityBoundary(node.getTextBlock(), boundary)) - .map(boundary -> byBoundary(boundary, type, entityType, node)); + .map(boundary -> byBoundary(boundary, type, entityType, node)) + .filter(Optional::isPresent) + .map(Optional::get); } @@ -129,7 +132,9 @@ public class EntityCreationService { return searchImplementation.getBoundaries(node.getTextBlock(), node.getBoundary()) .stream() .filter(boundary -> isValidEntityBoundary(node.getTextBlock(), boundary)) - .map(bounds -> byBoundary(bounds, type, entityType, node)); + .map(bounds -> byBoundary(bounds, type, entityType, node)) + .filter(Optional::isPresent) + .map(Optional::get); } @@ -141,7 +146,9 @@ public class EntityCreationService { .stream() .map(boundary -> toLineAfterBoundary(textBlock, boundary)) .filter(boundary -> isValidEntityBoundary(textBlock, boundary)) - .map(boundary -> byBoundary(boundary, type, entityType, node)); + .map(boundary -> byBoundary(boundary, type, entityType, node)) + .filter(Optional::isPresent) + .map(Optional::get); } @@ -152,19 +159,24 @@ public class EntityCreationService { .stream() .map(boundary -> toLineAfterBoundary(textBlock, boundary)) .filter(boundary -> isValidEntityBoundary(textBlock, boundary)) - .map(boundary -> byBoundary(boundary, type, entityType, node)); + .map(boundary -> byBoundary(boundary, type, entityType, node)) + .filter(Optional::isPresent) + .map(Optional::get); } - public Stream byRegexWithLinebreaks(String regexPattern, String type, EntityType entityType, SemanticNode node) { - return byRegexWithLinebreaks(regexPattern, type, entityType, 0, node); + public Stream byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, SemanticNode node) { + + return byRegexWithLineBreaks(regexPattern, type, entityType, 0, node); } - public Stream byRegexWithLinebreaksIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) { - return byRegexWithLinebreaksIgnoreCase(regexPattern, type, entityType, 0, node); + public Stream byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) { + + return byRegexWithLineBreaksIgnoreCase(regexPattern, type, entityType, 0, node); } + public Stream byRegex(String regexPattern, String type, EntityType entityType, SemanticNode node) { return byRegex(regexPattern, type, entityType, 0, node); @@ -177,20 +189,33 @@ public class EntityCreationService { } - public Stream byRegexWithLinebreaks(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) { + public Stream byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) { - return RedactionSearchUtility.findBoundariesByRegexWithLinebreaks(regexPattern, group, node.getTextBlock()).stream().map(boundary -> byBoundary(boundary, type, entityType, node)); + return RedactionSearchUtility.findBoundariesByRegexWithLineBreaks(regexPattern, group, node.getTextBlock()) + .stream() + .map(boundary -> byBoundary(boundary, type, entityType, node)) + .filter(Optional::isPresent) + .map(Optional::get); } - public Stream byRegexWithLinebreaksIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) { + public Stream byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) { - return RedactionSearchUtility.findBoundariesByRegexWithLinebreaksIgnoreCase(regexPattern, group, node.getTextBlock()).stream().map(boundary -> byBoundary(boundary, type, entityType, node)); + return RedactionSearchUtility.findBoundariesByRegexWithLineBreaksIgnoreCase(regexPattern, group, node.getTextBlock()) + .stream() + .map(boundary -> byBoundary(boundary, type, entityType, node)) + .filter(Optional::isPresent) + .map(Optional::get); } + public Stream byRegex(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) { - return RedactionSearchUtility.findBoundariesByRegex(regexPattern, group, node.getTextBlock()).stream().map(boundary -> byBoundary(boundary, type, entityType, node)); + return RedactionSearchUtility.findBoundariesByRegex(regexPattern, group, node.getTextBlock()) + .stream() + .map(boundary -> byBoundary(boundary, type, entityType, node)) + .filter(Optional::isPresent) + .map(Optional::get); } @@ -198,13 +223,25 @@ public class EntityCreationService { return RedactionSearchUtility.findBoundariesByRegexIgnoreCase(regexPattern, group, node.getTextBlock()) .stream() - .map(boundary -> byBoundary(boundary, type, entityType, node)); + .map(boundary -> byBoundary(boundary, type, entityType, node)) + .filter(Optional::isPresent) + .map(Optional::get); } public Stream byString(String keyword, String type, EntityType entityType, SemanticNode node) { - return RedactionSearchUtility.findBoundariesByString(keyword, node.getTextBlock()).stream().map(boundary -> byBoundary(boundary, type, entityType, node)); + return RedactionSearchUtility.findBoundariesByString(keyword, node.getTextBlock()) + .stream() + .map(boundary -> byBoundary(boundary, type, entityType, node)) + .filter(Optional::isPresent) + .map(Optional::get); + } + + + public Stream bySemanticNodeParagraphsOnly(SemanticNode node, String type, EntityType entityType) { + + return node.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(semanticNode -> bySemanticNode(semanticNode, type, entityType)).filter(Optional::isPresent).map(Optional::get); } @@ -218,18 +255,18 @@ public class EntityCreationService { if (!isValidEntityBoundary(node.getTextBlock(), boundary)) { return Optional.empty(); } - return Optional.of(byBoundary(boundary, type, entityType, node)); + return byBoundary(boundary, type, entityType, node); } - public RedactionEntity byPrefixExpansionRegex(RedactionEntity entity, String regexPattern) { + public Optional byPrefixExpansionRegex(RedactionEntity entity, String regexPattern) { int expandedStart = getExpandedStartByRegex(entity, regexPattern); return byBoundary(new Boundary(expandedStart, entity.getBoundary().end()), entity.getType(), entity.getEntityType(), entity.getDeepestFullyContainingNode()); } - public RedactionEntity bySuffixExpansionRegex(RedactionEntity entity, String regexPattern) { + public Optional bySuffixExpansionRegex(RedactionEntity entity, String regexPattern) { int expandedEnd = getExpandedEndByRegex(entity, regexPattern); expandedEnd = truncateEndIfLineBreakIsBetween(entity.getBoundary().end(), expandedEnd, entity.getDeepestFullyContainingNode().getTextBlock()); @@ -246,7 +283,32 @@ public class EntityCreationService { } - public RedactionEntity byBoundary(Boundary boundary, String type, EntityType entityType, SemanticNode node) { + /** + * Creates a redaction entity based on the given boundary, type, entity type, and semantic node. + * If the document already contains an equal redaction entity, then en empty Optional is returned. + * + * @param boundary The boundary of the redaction entity. + * @param type The type of the redaction entity. + * @param entityType The entity type of the redaction entity. + * @param node The semantic node to associate with the redaction entity. + * @return An Optional containing the redaction entity, or an empty Optional if the entity already exists. + */ + public Optional byBoundary(Boundary boundary, String type, EntityType entityType, SemanticNode node) { + + if (!node.getBoundary().contains(boundary)) { + throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", boundary, node.getBoundary(), node)); + } + Boundary trimmedBoundary = boundary.trim(node.getTextBlock()); + RedactionEntity entity = RedactionEntity.initialEntityNode(trimmedBoundary, type, entityType); + if (node.getEntities().contains(entity)) { + return Optional.empty(); + } + addEntityToGraph(entity, node); + return Optional.of(entity); + } + + + public RedactionEntity forceByBoundary(Boundary boundary, String type, EntityType entityType, SemanticNode node) { Boundary trimmedBoundary = boundary.trim(node.getTextBlock()); RedactionEntity entity = RedactionEntity.initialEntityNode(trimmedBoundary, type, entityType); @@ -281,19 +343,15 @@ public class EntityCreationService { } - public RedactionEntity byNerEntity(NerEntities.NerEntity nerEntity, EntityType entityType, SemanticNode semanticNode) { + public Optional byNerEntity(NerEntities.NerEntity nerEntity, EntityType entityType, SemanticNode semanticNode) { - RedactionEntity entity = byBoundary(nerEntity.boundary(), nerEntity.type(), entityType, semanticNode); - entity.addEngine(Engine.NER); - return entity; + return byBoundary(nerEntity.boundary(), nerEntity.type(), entityType, semanticNode).stream().peek(entity -> entity.addEngine(Engine.NER)).findAny(); } - public RedactionEntity byNerEntity(NerEntities.NerEntity nerEntity, String type, EntityType entityType, SemanticNode semanticNode) { + public Optional byNerEntity(NerEntities.NerEntity nerEntity, String type, EntityType entityType, SemanticNode semanticNode) { - RedactionEntity entity = byBoundary(nerEntity.boundary(), type, entityType, semanticNode); - entity.addEngine(Engine.NER); - return entity; + return byBoundary(nerEntity.boundary(), type, entityType, semanticNode).stream().peek(entity -> entity.addEngine(Engine.NER)).findAny(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/utils/RectangleTransformations.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/utils/RectangleTransformations.java index fdc3639f..4c101e57 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/utils/RectangleTransformations.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/utils/RectangleTransformations.java @@ -116,7 +116,7 @@ public class RectangleTransformations { @Override public BiConsumer accumulator() { - return (bb, rect) -> bb.addRectangle(rect.getMinX(), rect.getMinY(), rect.getMaxX(), rect.getMaxY()); + return BBox::addRectangle; } @@ -154,7 +154,12 @@ public class RectangleTransformations { Double upperRightY; - public void addRectangle(double lowerLeftX, double lowerLeftY, double upperRightX, double upperRightY) { + public void addRectangle(Rectangle2D rectangle2D) { + + double lowerLeftX = Math.min(rectangle2D.getMinX(), rectangle2D.getMaxX()); + double lowerLeftY = Math.min(rectangle2D.getMinY(), rectangle2D.getMaxY()); + double upperRightX = Math.max(rectangle2D.getMinX(), rectangle2D.getMaxX()); + double upperRightY = Math.max(rectangle2D.getMinY(), rectangle2D.getMaxY()); if (this.lowerLeftX == null) { this.lowerLeftX = lowerLeftX; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/utils/RedactionSearchUtility.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/utils/RedactionSearchUtility.java index 81bc9907..1ca46870 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/utils/RedactionSearchUtility.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/utils/RedactionSearchUtility.java @@ -102,17 +102,17 @@ public class RedactionSearchUtility { } - public static List findBoundariesByRegexWithLinebreaks(String regexPattern, int group, TextBlock textBlock) { + public static List findBoundariesByRegexWithLineBreaks(String regexPattern, int group, TextBlock textBlock) { Pattern pattern = Patterns.getCompiledMultilinePattern(regexPattern, false); - return getBoundariesByPatternWithLinebreaks(textBlock, group, pattern); + return getBoundariesByPatternWithLineBreaks(textBlock, group, pattern); } - public static List findBoundariesByRegexWithLinebreaksIgnoreCase(String regexPattern, int group, TextBlock textBlock) { + public static List findBoundariesByRegexWithLineBreaksIgnoreCase(String regexPattern, int group, TextBlock textBlock) { Pattern pattern = Patterns.getCompiledMultilinePattern(regexPattern, true); - return getBoundariesByPatternWithLinebreaks(textBlock, group, pattern); + return getBoundariesByPatternWithLineBreaks(textBlock, group, pattern); } @@ -134,21 +134,10 @@ public class RedactionSearchUtility { } - private static List getBoundariesByPatternWithLinebreaks(TextBlock textBlock, int group, Pattern pattern) { + private static List getBoundariesByPatternWithLineBreaks(TextBlock textBlock, int group, Pattern pattern) { - StringBuilder stringBuilder = new StringBuilder(); - textBlock.getAtomicTextBlocks().forEach(at -> { - if (at.numberOfLines() > 1) { - for (int i = 0; i < at.numberOfLines(); i++) { - stringBuilder.append(at.getLine(i)); - stringBuilder.setCharAt(stringBuilder.length() - 1, '\n'); - } - } else { - stringBuilder.append(at.getSearchText()).setCharAt(stringBuilder.length() - 1, '\n'); - } - }); - - Matcher matcher = pattern.matcher(stringBuilder.toString()); + String searchTextWithLineBreaks = textBlock.searchTextWithLineBreaks(); + Matcher matcher = pattern.matcher(searchTextWithLineBreaks); List boundaries = new LinkedList<>(); while (matcher.find()) { boundaries.add(new Boundary(matcher.start(group) + textBlock.getBoundary().start(), matcher.end(group) + textBlock.getBoundary().start())); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java index 4cc67431..a8b74763 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java @@ -84,6 +84,7 @@ public class AnalyzeService { @Timed("redactmanager_analyzeDocumentStructure") public AnalyzeResult analyzeDocumentStructure(StructureAnalyzeRequest analyzeRequest) { + log.info("Starting Structure Analysis for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); long startTime = System.currentTimeMillis(); ClassificationDocument classifiedDoc; @@ -92,25 +93,29 @@ public class AnalyzeService { var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.ORIGIN)); - + log.info("Loaded PDF for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); Map> pdfImages = null; if (redactionServiceSettings.isEnableImageClassification()) { pdfImages = imageServiceResponseAdapter.convertImages(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); + log.info("Loaded image service response for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); } classifiedDoc = pdfSegmentationService.parseDocument(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), storedObjectStream, pdfImages); + log.info("Parsed document for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); } catch (Exception e) { throw new RedactionException(e); } Document document = DocumentGraphFactory.buildDocumentGraph(classifiedDoc); + log.info("Built Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); SectionGrid sectionGrid = sectionGridCreatorService.createSectionGrid(document); + log.info("Built section grid for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - log.info("Store document graph, text, simplified text, and section grid for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.TEXT, DocumentData.fromDocument(document)); redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.SIMPLIFIED_TEXT, toSimplifiedText(document)); redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, sectionGrid); + log.info("Stored document graph, text, simplified text, and section grid for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); return AnalyzeResult.builder() .dossierId(analyzeRequest.getDossierId()) @@ -125,21 +130,27 @@ public class AnalyzeService { @Timed("redactmanager_analyze") public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) { + log.info("Starting Analysis for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); long startTime = System.currentTimeMillis(); Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId())); + log.info("Loaded Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); NerEntities nerEntities = getEntityRecognitionEntities(analyzeRequest, document); + log.info("Loaded Ner Entities for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId()); + Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId()); + log.info("Updated Dictionary for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId()); long rulesVersion = droolsExecutionService.getRulesVersion(analyzeRequest.getDossierTemplateId()); - Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId()); + log.info("Updated Rules to Version {} for file {} in dossier {}", rulesVersion, analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - log.debug("Starting Dictionary Search"); - long dictSearchStart = System.currentTimeMillis(); entityRedactionService.addDictionaryEntities(dictionary, document); - log.debug("Finished Dictionary Search in {} ms", System.currentTimeMillis() - dictSearchStart); + log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + Set addedFileAttributes = entityRedactionService.addRuleEntities(dictionary, document, kieContainer, analyzeRequest, nerEntities); + log.info("Finished Rule Execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); List redactionLogEntries = redactionLogCreatorService.createRedactionLog(document, analyzeRequest.getDossierTemplateId()); @@ -168,10 +179,12 @@ public class AnalyzeService { @SneakyThrows public AnalyzeResult reanalyze(@RequestBody AnalyzeRequest analyzeRequest) { + log.info("Starting Reanalysis for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); long startTime = System.currentTimeMillis(); RedactionLog previousRedactionLog = redactionStorageService.getRedactionLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); + log.info("Loaded previous redaction log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId())); - + log.info("Loaded Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); // not yet ready for reanalysis if (previousRedactionLog == null || document == null || document.getNumberOfPages() == 0) { return analyze(analyzeRequest); @@ -183,6 +196,7 @@ public class AnalyzeService { Set sectionsToReanalyseIds = getSectionsToReanalyseIds(analyzeRequest, previousRedactionLog, document, dictionaryIncrement); List sectionsToReAnalyse = getSectionsToReAnalyse(document, sectionsToReanalyseIds); + log.info("{} Sections to reanalyze found for file {} in dossier {}", sectionsToReanalyseIds.size(), analyzeRequest.getFileId(), analyzeRequest.getDossierId()); if (sectionsToReAnalyse.isEmpty()) { return finalizeAnalysis(analyzeRequest, @@ -195,15 +209,16 @@ public class AnalyzeService { } NerEntities nerEntities = getEntityRecognitionEntitiesFilteredBySectionIds(analyzeRequest, document, sectionsToReanalyseIds); - log.info("Reanalyze {} sections with {} Ner Entities", sectionsToReAnalyse.size(), nerEntities.getNerEntityList().size()); + log.info("Loaded Ner Entities for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId()); + log.info("Updated Rules for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId()); sectionsToReAnalyse.forEach(node -> entityRedactionService.addDictionaryEntities(dictionary, node)); + log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - long ruleStart = System.currentTimeMillis(); Set addedFileAttributes = entityRedactionService.addRuleEntities(dictionary, document, sectionsToReAnalyse, kieContainer, analyzeRequest, nerEntities); - log.info("Rule execution took {} ms", System.currentTimeMillis() - ruleStart); + log.info("Finished Rule Execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); List newRedactionLogEntries = redactionLogCreatorService.createRedactionLog(document, analyzeRequest.getDossierTemplateId()); @@ -244,7 +259,10 @@ public class AnalyzeService { analyzeRequest.getFileId(), redactionLog, analyzeRequest.getAnalysisNumber()); + log.info("Created Redaction Log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLogChange.getRedactionLog()); + log.info("Stored Redaction Log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); long duration = System.currentTimeMillis() - startTime; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java index 3e27a9b7..e2856fe5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java @@ -252,11 +252,11 @@ public class DictionaryService { falsePositives.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT))); falseRecommendations.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT))); } - log.info("Dictionary update returned {} entries {} falsePositives and {} falseRecommendations for type {}", + log.debug("Dictionary update returned {} entries {} falsePositives and {} falseRecommendations for type {}", entries.size(), falsePositives.size(), falseRecommendations.size(), - type.getType()); + typeId); return new DictionaryEntries(entries, falsePositives, falseRecommendations); } @@ -304,7 +304,8 @@ public class DictionaryService { if (dossierDictionaryExists(dossierId)) { var dossierRepresentation = getDossierDictionary(dossierId); var dossierDictionaries = dossierRepresentation.getDictionary(); - mergedDictionaries = convertCommonsDictionaryModel(dictionaryMergeService.getMergedDictionary(convertDictionaryModel(dossierTemplateDictionaries), convertDictionaryModel(dossierDictionaries))); + mergedDictionaries = convertCommonsDictionaryModel(dictionaryMergeService.getMergedDictionary(convertDictionaryModel(dossierTemplateDictionaries), + convertDictionaryModel(dossierDictionaries))); dossierDictionaryVersion = dossierRepresentation.getDictionaryVersion(); } else { mergedDictionaries = new ArrayList<>(); @@ -367,23 +368,37 @@ public class DictionaryService { } } + private List convertDictionaryModel(List dictionaries) { - return dictionaries.stream().map(d -> CommonsDictionaryModel.builder() - .type(d.getType()) - .rank(d.getRank()) - .color(d.getColor()) - .caseInsensitive(d.isCaseInsensitive()) - .hint(d.isHint()) - .isDossierDictionary(d.isDossierDictionary()) - .entries(d.getEntries()) - .falsePositives(d.getFalsePositives()) - .falseRecommendations(d.getFalseRecommendations()) - .build()).collect(Collectors.toList()); + + return dictionaries.stream() + .map(d -> CommonsDictionaryModel.builder() + .type(d.getType()) + .rank(d.getRank()) + .color(d.getColor()) + .caseInsensitive(d.isCaseInsensitive()) + .hint(d.isHint()) + .isDossierDictionary(d.isDossierDictionary()) + .entries(d.getEntries()) + .falsePositives(d.getFalsePositives()) + .falseRecommendations(d.getFalseRecommendations()) + .build()) + .collect(Collectors.toList()); } + private List convertCommonsDictionaryModel(List commonsDictionaries) { - return commonsDictionaries.stream().map(cd -> - new DictionaryModel(cd.getType(), cd.getRank(), cd.getColor(), cd.isCaseInsensitive(), cd.isHint(), cd.getEntries(), cd.getFalsePositives(), cd.getFalseRecommendations(), cd.isDossierDictionary())) + + return commonsDictionaries.stream() + .map(cd -> new DictionaryModel(cd.getType(), + cd.getRank(), + cd.getColor(), + cd.isCaseInsensitive(), + cd.isHint(), + cd.getEntries(), + cd.getFalsePositives(), + cd.getFalseRecommendations(), + cd.isDossierDictionary())) .collect(Collectors.toList()); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java index 0b4b2f2a..b6fafe06 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java @@ -23,7 +23,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribu import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; import com.iqser.red.service.redaction.v1.server.client.RulesClient; import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException; -import com.knecon.fforesight.tenantcommons.TenantContext; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Document; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityCreationService; @@ -91,6 +90,7 @@ public class DroolsExecutionService { kieSession.setGlobal("dictionary", dictionary); kieSession.setGlobal("nerEntitiesAdapter", nerEntitiesAdapter); + kieSession.insert(document); document.getEntities().forEach(kieSession::insert); sectionsToAnalyze.forEach(kieSession::insert); sectionsToAnalyze.stream().flatMap(SemanticNode::streamAllSubNodes).forEach(kieSession::insert); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionSurroundingTextService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionSurroundingTextService.java index 6b64b38a..c914b140 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionSurroundingTextService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionSurroundingTextService.java @@ -75,7 +75,7 @@ public class ManualRedactionSurroundingTextService { Set entities = RedactionSearchUtility.findBoundariesByString(value, node.getTextBlock()) .stream() - .map(boundary -> entityCreationService.byBoundary(boundary, "searchHelper", EntityType.RECOMMENDATION, node)) + .map(boundary -> entityCreationService.forceByBoundary(boundary, "searchHelper", EntityType.RECOMMENDATION, node)) .collect(Collectors.toSet()); RedactionEntity correctEntity = getEntityOnCorrectPosition(entities, toFindPositions); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java index 961e3bc9..5d9d5677 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java @@ -35,8 +35,9 @@ public class RedactionLogCreatorService { document.getEntities() .stream() .filter(RedactionLogCreatorService::isEntityOrRecommendationType) + .filter(entity -> !entity.isRemoved()) .forEach(entityNode -> entries.addAll(toRedactionLogEntries(entityNode, processedIds, dossierTemplateId))); - document.streamAllImages().forEach(imageNode -> entries.add(createRedactionLogEntry(imageNode, dossierTemplateId))); + document.streamAllImages().filter(image -> !image.isRemoved()).forEach(imageNode -> entries.add(createRedactionLogEntry(imageNode, dossierTemplateId))); return entries; } @@ -80,17 +81,14 @@ public class RedactionLogCreatorService { private RedactionLogEntry createRedactionLogEntry(RedactionEntity entity, String dossierTemplateId) { Set referenceIds = new HashSet<>(); - entity.getReferences() - .stream() - .filter(redactionEntity -> !redactionEntity.isRemoved() && !redactionEntity.isIgnored()) - .forEach(ref -> ref.getRedactionPositionsPerPage().forEach(pos -> referenceIds.add(pos.getId()))); + entity.getReferences().stream().filter(RedactionEntity::isActive).forEach(ref -> ref.getRedactionPositionsPerPage().forEach(pos -> referenceIds.add(pos.getId()))); int sectionNumber = entity.getDeepestFullyContainingNode().getTreeId().isEmpty() ? 0 : entity.getDeepestFullyContainingNode().getTreeId().get(0); return RedactionLogEntry.builder() .color(getColor(entity.getType(), dossierTemplateId, entity.isApplied())) - .reason(entity.getMatchedRule().reason()) - .legalBasis(entity.getMatchedRule().legalBasis()) - .value(entity.getValue()) + .reason(entity.getMatchedRule().getReason()) + .legalBasis(entity.getMatchedRule().getLegalBasis()) + .value(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue()) .type(entity.getType()) .redacted(entity.isApplied()) .isHint(isHint(entity.getType(), dossierTemplateId)) @@ -98,7 +96,7 @@ public class RedactionLogCreatorService { .isFalsePositive(entity.getEntityType().equals(EntityType.FALSE_POSITIVE) || entity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION)) .section(entity.getDeepestFullyContainingNode().toString()) .sectionNumber(sectionNumber) - .matchedRule(entity.getMatchedRule().ruleIdentifier().toString()) + .matchedRule(entity.getMatchedRule().getRuleIdentifier().toString()) .isDictionaryEntry(entity.isDictionaryEntry()) .textAfter(entity.getTextAfter()) .textBefore(entity.getTextBefore()) @@ -120,9 +118,9 @@ public class RedactionLogCreatorService { .isImage(true) .type(imageType) .redacted(image.isApplied()) - .reason(image.getMatchedRule().reason()) - .legalBasis(image.getMatchedRule().legalBasis()) - .matchedRule(image.getMatchedRule().ruleIdentifier().toString()) + .reason(image.getMatchedRule().getReason()) + .legalBasis(image.getMatchedRule().getLegalBasis()) + .matchedRule(image.getMatchedRule().getRuleIdentifier().toString()) .isHint(dictionaryService.isHint(image.getImageType().toString(), dossierTemplateId)) .isDictionaryEntry(false) .isRecommendation(false) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/SectionFinderService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/SectionFinderService.java index 12347b42..49b2cdeb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/SectionFinderService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/SectionFinderService.java @@ -55,7 +55,7 @@ class SectionFinderService { } }); - log.info("Took: {} milliseconds to find sections to reanalyze", System.currentTimeMillis() - start); + log.debug("Took: {} milliseconds to find sections to reanalyze", System.currentTimeMillis() - start); return sectionsToReanalyse; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java index b1828e88..74ab05bf 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java @@ -9,9 +9,9 @@ import lombok.experimental.UtilityClass; @UtilityClass public final class Patterns { - public static Map patternCache = new HashMap<>(); + public static final Map patternCache = new HashMap<>(); - public static Pattern AUTHOR_TABLE_SPLITTER = Pattern.compile( + public static final Pattern AUTHOR_TABLE_SPLITTER = Pattern.compile( "(((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,500}( ?[A-ZÄÖÜ]{1,2}\\.){1,3})|(((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,500}( ?[A-ZÄÖÜ]{1,2} ){1,3})"); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java new file mode 100644 index 00000000..260960b5 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java @@ -0,0 +1,163 @@ +package com.iqser.red.service.redaction.v1.server; + +import static org.mockito.Mockito.when; +import static org.wildfly.common.Assert.assertTrue; + +import java.io.FileOutputStream; +import java.io.IOException; +import java.time.OffsetDateTime; +import java.util.List; +import java.util.Set; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.springframework.boot.autoconfigure.EnableAutoConfiguration; +import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.ComponentScan; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.FilterType; +import org.springframework.context.annotation.Import; +import org.springframework.context.annotation.Primary; +import org.springframework.test.context.junit.jupiter.SpringExtension; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; +import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; +import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type; +import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest; +import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest; +import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse; +import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils; +import com.iqser.red.storage.commons.StorageAutoConfiguration; +import com.iqser.red.storage.commons.service.StorageService; +import com.knecon.fforesight.tenantcommons.TenantContext; + +@ExtendWith(SpringExtension.class) +@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT) +@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class) +public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { + + private static final String RULES = loadFromClassPath("drools/acceptance_rules.drl"); + + @Configuration + @EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class}) + @ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)}) + public static class RedactionIntegrationTestConfiguration { + + @Bean + @Primary + public StorageService inmemoryStorage() { + + return new FileSystemBackedStorageService(); + } + + } + + + @BeforeEach + public void stubClients() { + + TenantContext.setTenantId("redaction"); + + when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(RULES)); + + loadDictionaryForTest(); + loadTypeForTest(); + loadNerForTest(); + when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, false)).thenReturn(getTypeResponse()); + + when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder() + .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(DOSSIER_REDACTIONS_INDICATOR) + .dossierTemplateId(TEST_DOSSIER_ID) + .hexColor("#ffe187") + .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .build())); + + mockDictionaryCalls(null); + + when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors); + } + + + @Test + public void acceptanceTests() throws IOException { + + AnalyzeRequest request = uploadFileToStorage("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf"); + System.out.println("Start Full integration test"); + analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); + System.out.println("Finished structure analysis"); + AnalyzeResult result = analyzeService.analyze(request); + System.out.println("Finished analysis"); + var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); + + var publishedInformationEntry1 = redactionLog.getRedactionLogEntry() + .stream() + .filter(entry -> entry.getType().equals("published_information")) + .filter(entry -> entry.getValue().equals("Oxford University Press")) + .findFirst() + .orElseThrow(); + + var asyaLyon1 = redactionLog.getRedactionLogEntry() + .stream() + .filter(entry -> entry.getType().equals("CBI_author")) + .filter(entry -> entry.getValue().equals("Asya Lyon")) + .filter(entry -> entry.getSectionNumber() == publishedInformationEntry1.getSectionNumber()) + .findFirst() + .orElseThrow(); + + // assertFalse(asyaLyon1.isRedacted()); + + var idRemoval = IdRemoval.builder() + .requestDate(OffsetDateTime.now()) + .annotationId(publishedInformationEntry1.getId()) + .status(AnnotationStatus.APPROVED) + .fileId(TEST_FILE_ID) + .build(); + + var manualRedactions = ManualRedactions.builder().idsToRemove(Set.of(idRemoval)).build(); + request.setManualRedactions(manualRedactions); + analyzeService.reanalyze(request); + + redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); + + var publishedInformationEntry2 = redactionLog.getRedactionLogEntry() + .stream() + .filter(entry -> entry.getType().equals("published_information")) + .filter(entry -> entry.getValue().equals("Oxford University Press")) + .findFirst() + .orElseThrow(); + + var asyaLyon2 = redactionLog.getRedactionLogEntry() + .stream() + .filter(entry -> entry.getType().equals("CBI_author")) + .filter(entry -> entry.getValue().equals("Asya Lyon")) + .filter(entry -> entry.getSectionNumber() == publishedInformationEntry2.getSectionNumber()) + .findFirst() + .orElseThrow(); + + assertTrue(asyaLyon2.isRedacted()); + + AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build()); + + String outputFileName = OsUtils.getTemporaryDirectory() + "/AcceptanceTest.pdf"; + + try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) { + fileOutputStream.write(annotateResponse.getDocument()); + } + + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/entity/RedactionEntityTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/entity/RedactionEntityTest.java index 24be60ad..2c261855 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/entity/RedactionEntityTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/entity/RedactionEntityTest.java @@ -20,7 +20,7 @@ public class RedactionEntityTest { entity.skip("CBI.3.0", ""); entity.skip("CBI.4.1", ""); entity.skip("CBI.4.0", ""); - assertThat(entity.getMatchedRule().ruleIdentifier().toString()).isEqualTo("CBI.4.1"); + assertThat(entity.getMatchedRule().getRuleIdentifier().toString()).isEqualTo("CBI.4.1"); assertThat(entity.getMatchedRuleUnit()).isEqualTo(4); } @@ -34,7 +34,7 @@ public class RedactionEntityTest { entity.skip("CBI.3.0", ""); entity.skip("CBI.4.1", ""); entity.skip("CBI.4.0", ""); - assertThat(entity.getMatchedRule().ruleIdentifier().toString()).isEqualTo("MAN.2.0"); + assertThat(entity.getMatchedRule().getRuleIdentifier().toString()).isEqualTo("MAN.2.0"); assertThat(entity.getMatchedRuleUnit()).isEqualTo(2); } @@ -59,7 +59,7 @@ public class RedactionEntityTest { entity.apply("CBI.0.0", "", ""); }); entity.skip("CBI.2.0", ""); - assertThat(entity.getMatchedRule().ruleIdentifier().toString()).isEqualTo("CBI.2.0"); + assertThat(entity.getMatchedRule().getRuleIdentifier().toString()).isEqualTo("CBI.2.0"); assertThat(entity.getMatchedRuleUnit()).isEqualTo(2); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentEntityInsertionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentEntityInsertionIntegrationTest.java index 5599e91c..82be8fa9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentEntityInsertionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentEntityInsertionIntegrationTest.java @@ -44,6 +44,17 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra } + @Test + public void assertSameEntitiesCantBeCreatedTwice() { + + Document document = buildGraph("files/new/crafted document.pdf"); + String type = "CBI_author"; + assertTrue(entityCreationService.byBoundary(new Boundary(0, 10), type, EntityType.ENTITY, document).isPresent()); + assertTrue(entityCreationService.byBoundary(new Boundary(0, 10), type, EntityType.ENTITY, document).isEmpty()); + assertEquals(1, document.getEntities().size()); + } + + private RedactionEntity createAndInsertEntity(Document document, String searchTerm) { int start = document.getTextBlock().indexOf(searchTerm); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/ManualResizeRedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/ManualResizeRedactionIntegrationTest.java index 443cf69a..1d0b04d9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/ManualResizeRedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/ManualResizeRedactionIntegrationTest.java @@ -138,7 +138,7 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati assertFalse(entity.getIntersectingNodes().isEmpty()); assertEquals(1, entity.getPages().size()); assertEquals("David Ksenia", entity.getValue()); - assertEquals("Something", entity.getMatchedRule().legalBasis()); + assertEquals("Something", entity.getMatchedRule().getLegalBasis()); assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId()); assertFalse(entity.isRemoved()); assertTrue(entity.isSkipRemoveEntitiesContainedInLarger()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/sectionidentifiers/SectionIdentifierTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/sectionidentifiers/SectionIdentifierTest.java new file mode 100644 index 00000000..36d89b62 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/sectionidentifiers/SectionIdentifierTest.java @@ -0,0 +1,58 @@ +package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.sectionidentifiers; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.Test; + +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SectionIdentifier; + +class SectionIdentifierTest { + + @Test + public void testParentOf() { + + var headline = SectionIdentifier.fromSearchText("1 Did you ever hear the tragedy of Darth Plagueis The Wise?"); + var headline1 = SectionIdentifier.fromSearchText("1.0 I thought not. It’s not a story the Jedi would tell you."); + var headline2 = SectionIdentifier.fromSearchText("1.1 It’s a Sith legend. Darth Plagueis was a Dark Lord of the Sith, "); + var headline3 = SectionIdentifier.fromSearchText("1.2.3 so powerful and so wise he could use the Force to influence the midichlorians to create life…"); + var headline4 = SectionIdentifier.fromSearchText("1.2.3.4 He had such a knowledge of the dark side that he could even keep the ones he cared about from dying."); + var headline5 = SectionIdentifier.fromSearchText("1.2.3.4.5 The dark side of the Force is a pathway to many abilities some consider to be unnatural."); + var headline6 = SectionIdentifier.fromSearchText("2.0 He became so powerful…"); + var headline7 = SectionIdentifier.fromSearchText("10000.0 the only thing he was afraid of was losing his power,"); + var headline8 = SectionIdentifier.fromSearchText("A.0 which eventually, of course, he did."); + var headline9 = SectionIdentifier.fromSearchText("Unfortunately, he taught his apprentice everything he knew, then his apprentice killed him in his sleep."); + var headline10 = SectionIdentifier.fromSearchText("2.1.2 Ironic."); + var headline11 = SectionIdentifier.fromSearchText("2.He could save others from death,"); + var headline12 = SectionIdentifier.fromSearchText(" 2. but not himself."); + + var paragraph1 = SectionIdentifier.asChildOf(headline); + assertTrue(paragraph1.isChildOf(headline)); + assertTrue(headline.isParentOf(paragraph1)); + assertFalse(paragraph1.isParentOf(headline)); + + assertFalse(headline.isParentOf(headline1)); + assertTrue(headline.isParentOf(headline2)); + assertTrue(headline.isParentOf(headline3)); + assertTrue(headline.isParentOf(headline4)); + assertTrue(headline.isParentOf(headline5)); + assertTrue(headline1.isParentOf(headline2)); + assertFalse(headline1.isParentOf(headline1)); + assertTrue(headline3.isParentOf(headline4)); + assertFalse(headline4.isParentOf(headline5)); + assertFalse(headline2.isParentOf(headline3)); + assertFalse(headline2.isParentOf(headline4)); + assertTrue(headline1.isParentOf(headline3)); + assertTrue(headline1.isParentOf(headline4)); + assertFalse(headline1.isParentOf(headline6)); + assertFalse(headline1.isParentOf(headline7)); + assertFalse(headline8.isParentOf(headline1)); + assertFalse(headline8.isParentOf(headline2)); + assertFalse(headline8.isParentOf(headline3)); + assertFalse(headline8.isParentOf(headline4)); + assertFalse(headline9.isParentOf(headline9)); + assertTrue(headline10.isChildOf(headline11)); + assertTrue(headline10.isChildOf(headline12)); + } + +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/textblock/AtomicTextBlockTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/textblock/AtomicTextBlockTest.java new file mode 100644 index 00000000..76bb80e1 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/textblock/AtomicTextBlockTest.java @@ -0,0 +1,93 @@ +package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.util.List; + +import org.junit.jupiter.api.Test; + +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary; + +class AtomicTextBlockTest { + + @Test + void subSequenceWithLineBreaks1() { + + String searchText = "1234 6789 "; + var atb = AtomicTextBlock.builder().searchText(searchText).lineBreaks(List.of(5, 7)).boundary(new Boundary(0, searchText.length())).build(); + String searchTextWithLineBreaks = atb.searchTextWithLineBreaks(); + assertEquals("1234\n6789\n", searchTextWithLineBreaks); + } + + + @Test + void subSequenceWithLineBreaks2() { + + String searchText = "1234 6789 "; + var atb = AtomicTextBlock.builder().searchText(searchText).lineBreaks(List.of(5, 7, 8, 9)).boundary(new Boundary(0, searchText.length())).build(); + String searchTextWithLineBreaks = atb.searchTextWithLineBreaks(); + assertEquals("1234\n6789\n", searchTextWithLineBreaks); + } + + + @Test + void subSequenceWithLineBreaks3() { + + String searchText = "1234 6789 1234 "; + var atb = AtomicTextBlock.builder().searchText(searchText).lineBreaks(List.of(5, 7)).boundary(new Boundary(0, searchText.length())).build(); + String searchTextWithLineBreaks = atb.searchTextWithLineBreaks(); + assertEquals("1234\n6789\n1234\n", searchTextWithLineBreaks); + } + + + @Test + void subSequenceWithLineBreaks4() { + + String searchText = "1234 6789 1234 "; + var atb = AtomicTextBlock.builder().searchText(searchText).lineBreaks(List.of(5, 7)).boundary(new Boundary(0, searchText.length())).build(); + var textBlock = new ConcatenatedTextBlock(List.of(atb)); + String searchTextWithLineBreaks = textBlock.searchTextWithLineBreaks(); + assertEquals("1234\n6789\n1234\n", searchTextWithLineBreaks); + } + + + @Test + void subSequenceWithLineBreaks5() { + + String searchText1 = "1234 6789 "; + String searchText2 = "1234 "; + var atb1 = AtomicTextBlock.builder().searchText(searchText1).lineBreaks(List.of(5, 7)).boundary(new Boundary(0, searchText1.length())).build(); + var atb2 = AtomicTextBlock.builder() + .searchText(searchText2) + .lineBreaks(List.of()) + .boundary(new Boundary(searchText1.length(), searchText1.length() + searchText2.length())) + .build(); + var textBlock = new ConcatenatedTextBlock(List.of(atb1, atb2)); + String searchTextWithLineBreaks = textBlock.searchTextWithLineBreaks(); + assertEquals("1234\n6789\n1234\n", searchTextWithLineBreaks); + } + + + @Test + void subSequenceWithLineBreaks6() { + + String searchText1 = "1234 6789 "; + String searchText2 = "1234 "; + String searchText3 = "1234 8475678900 "; + var atb1 = AtomicTextBlock.builder().searchText(searchText1).lineBreaks(List.of(5, 7)).boundary(new Boundary(0, searchText1.length())).build(); + var atb2 = AtomicTextBlock.builder() + .searchText(searchText2) + .lineBreaks(List.of()) + .boundary(new Boundary(searchText1.length(), searchText1.length() + searchText2.length())) + .build(); + var atb3 = AtomicTextBlock.builder() + .searchText(searchText3) + .lineBreaks(List.of(atb2.getBoundary().end() + 6)) + .boundary(new Boundary(atb2.getBoundary().end(), atb2.getBoundary().end() + searchText3.length())) + .build(); + var textBlock = new ConcatenatedTextBlock(List.of(atb1, atb2, atb3)); + String searchTextWithLineBreaks = textBlock.searchTextWithLineBreaks(); + assertEquals("1234\n6789\n1234\n1234 8475678900\n", searchTextWithLineBreaks); + } + +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/utils/RectangleTransformationsTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/utils/RectangleTransformationsTest.java new file mode 100644 index 00000000..6bc9603f --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/utils/RectangleTransformationsTest.java @@ -0,0 +1,90 @@ +package com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.awt.geom.Rectangle2D; +import java.util.List; + +import org.junit.jupiter.api.Test; + +class RectangleTransformationsTest { + + @Test + public void testRectangle2DBBox() { + + var r1 = new Rectangle2D.Double(0, 0, 1, 1); + var r2 = new Rectangle2D.Double(1, 1, 1, 1); + var result = RectangleTransformations.rectangle2DBBox(List.of(r1, r2)); + assertEquals(0, result.getX()); + assertEquals(0, result.getY()); + assertEquals(2, result.getWidth()); + assertEquals(2, result.getHeight()); + } + + + @Test + public void testRectangle2DBBox2() { + + var r1 = new Rectangle2D.Double(0, 0, -1, -1); + var r2 = new Rectangle2D.Double(1, 1, 1, 1); + var result = RectangleTransformations.rectangle2DBBox(List.of(r1, r2)); + assertEquals(-1, result.getX()); + assertEquals(-1, result.getY()); + assertEquals(3, result.getWidth()); + assertEquals(3, result.getHeight()); + } + + + @Test + public void testRectangle2DBBox3() { + + var r1 = new Rectangle2D.Double(0, 0, -1, -1); + var r2 = new Rectangle2D.Double(1, 1, 1, 1); + var result = RectangleTransformations.rectangle2DBBox(List.of(r2, r1)); + assertEquals(-1, result.getX()); + assertEquals(-1, result.getY()); + assertEquals(3, result.getWidth()); + assertEquals(3, result.getHeight()); + } + + + @Test + public void testRectangle2DBBox4() { + + var r1 = new Rectangle2D.Double(2, 0, -1, -1); + var r2 = new Rectangle2D.Double(0, 2, 1, -1); + var result = RectangleTransformations.rectangle2DBBox(List.of(r2, r1)); + assertEquals(0, result.getX()); + assertEquals(-1, result.getY()); + assertEquals(2, result.getWidth()); + assertEquals(3, result.getHeight()); + } + + + @Test + public void testRectangle2DBBox5() { + + var r1 = new Rectangle2D.Double(2, 0, -1, -1); + var r2 = new Rectangle2D.Double(0, 2, 1, -1); + var r3 = new Rectangle2D.Double(3, 2, 1, 1); + var result = RectangleTransformations.rectangle2DBBox(List.of(r2, r1, r3)); + assertEquals(0, result.getX()); + assertEquals(-1, result.getY()); + assertEquals(4, result.getWidth()); + assertEquals(4, result.getHeight()); + } + + + @Test + public void testRectangle2DBBox6() { + + var r1 = new Rectangle2D.Double(0, 0, -1, -1); + var r2 = new Rectangle2D.Double(-1, -1, -1, -1); + var result = RectangleTransformations.rectangle2DBBox(List.of(r1, r2)); + assertEquals(-2, result.getX()); + assertEquals(-2, result.getY()); + assertEquals(2, result.getWidth()); + assertEquals(2, result.getHeight()); + } + +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/NerEntitiesAdapterTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/NerEntitiesAdapterTest.java index 5ce8f7e5..d428ccc4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/NerEntitiesAdapterTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/NerEntitiesAdapterTest.java @@ -8,6 +8,7 @@ import java.awt.geom.Rectangle2D; import java.io.File; import java.util.Collection; import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -66,6 +67,8 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest { .filter(e -> !e.type().equals("CBI_author")); List redactionEntities = Stream.concat(entityRecognitionEntities.stream(), unchangedAddressParts) .map(e -> entityCreationService.byBoundary(e.boundary(), e.type(), EntityType.ENTITY, document)) + .filter(Optional::isPresent) + .map(Optional::get) .toList(); redactionEntities.stream() .collect(Collectors.groupingBy(e -> e.getPages().stream().findFirst().get().getNumber())) @@ -98,6 +101,8 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest { log.info("Combined to CBI_address"); List cbiAddressEntities = nerEntityBoundaries.stream() .map(b -> entityCreationService.byBoundary(b, "CBI_address", EntityType.RECOMMENDATION, document)) + .filter(Optional::isPresent) + .map(Optional::get) .toList(); assertFalse(cbiAddressEntities.isEmpty()); assertTrue(cbiAddressEntities.stream().allMatch(entity -> entity.getBoundary().start() < entity.getBoundary().end())); @@ -108,6 +113,8 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest { .getNerEntityList() .stream() .map(e -> entityCreationService.byBoundary(e.boundary(), e.type(), EntityType.ENTITY, document)) + .filter(Optional::isPresent) + .map(Optional::get) .toList(); Stream.concat(cbiAddressEntities.stream(), validatedEntities.stream()) .collect(Collectors.groupingBy(e -> e.getPages().stream().findFirst().get().getNumber())) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_author.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_author.txt index 9b21c07e..737e1a40 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_author.txt +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_author.txt @@ -143,6 +143,7 @@ Allen T. Allen T.R. Almeida A Almeida A. +Asya Lyon Almeida A.A. Almeida A.A.|Vassilieff I. Almeida|A.A.|Vassilieff|I. diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/published_information.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/published_information.txt index 66b5a466..798747f9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/published_information.txt +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/published_information.txt @@ -87,3 +87,4 @@ Toxicol Sci. Toxicol Sci. 1 Test Ignored Hint Published Information Workshop +Oxford University Press diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl new file mode 100644 index 00000000..87452a6d --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl @@ -0,0 +1,772 @@ +package drools + +import static java.lang.String.format; +import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.anyMatch; +import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.exactMatch; + +import java.util.List; +import java.util.LinkedList; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.Collection; +import java.util.stream.Stream; +import java.util.Optional; + +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Section; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Table; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Document; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Paragraph; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Image; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.*; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.*; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.ImageType; +import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityCreationService; +import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary; +import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryModel; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.ManualRedactionApplicationService; +import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter; +import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility; + +global Document document +global EntityCreationService entityCreationService +global ManualRedactionApplicationService manualRedactionApplicationService +global NerEntitiesAdapter nerEntitiesAdapter +global Dictionary dictionary + +//------------------------------------ queries ------------------------------------ + +query "getFileAttributes" + $fileAttribute: FileAttribute() + end + +//------------------------------------ Syngenta specific rules ------------------------------------ + +// Rule unit: SYN.1 +rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL" + when + $section: Section(containsString("CT") || containsString("BL")) + then + /* Regular expression: ((\b((([Cc]T(([1ILli\/])| L|~P))|(BL))[\. ]?([\dA-Ziltphz~\/.:!]| ?[\(',][Ppi](\(e)?|([\(-?']\/))+( ?[\(\/\dA-Znasieg]+)?)\b( ?\/? ?\d+)?)|(\bCT[L1i]\b)) */ + entityCreationService.byRegexIgnoreCase("((\\b((([Cc]T(([1ILli\\/])| L|~P))|(BL))[\\. ]?([\\dA-Ziltphz~\\/.:!]| ?[\\(',][Ppi](\\(e)?|([\\(-?']\\/))+( ?[\\(\\/\\dA-Znasieg]+)?)\\b( ?\\/? ?\\d+)?)|(\\bCT[L1i]\\b))", "CBI_address", EntityType.RECOMMENDATION, $section) + .forEach(entity -> { + entity.skip("SYN.1.0", ""); + entity.addEngine(Engine.RULE); + insert(entity); + }); + end + + +//------------------------------------ CBI rules ------------------------------------ + +// Rule unit: CBI.0 +rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $entity: RedactionEntity(type == "CBI_author", dictionaryEntry) + then + $entity.apply("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $entity: RedactionEntity(type == "CBI_author", dictionaryEntry) + then + $entity.apply("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +// Rule unit: CBI.1 +rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $entity: RedactionEntity(type == "CBI_address", dictionaryEntry) + then + $entity.skip("CBI.1.0", "Address found for Non Vertebrate Study"); + end + +rule "CBI.1.1: Redact CBI Address (Vertebrate Study)" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $entity: RedactionEntity(type == "CBI_address", dictionaryEntry) + then + $entity.apply("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +// Rule unit: CBI.2 +rule "CBI.2.0: Don't redact genitive CBI_author" + when + $entity: RedactionEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), isApplied()) + then + entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document) + .ifPresent(falsePositive -> { + falsePositive.skip("CBI.2.0", "Genitive Author found"); + insert(falsePositive); + }); + end + + +// Rule unit: CBI.7 +rule "CBI.7.0: Do not redact Names and Addresses if published information found in section without tables" + when + $section: Section(!hasTables(), + hasEntitiesOfType("published_information"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(redactionEntity -> { + redactionEntity.skipWithReferences( + "CBI.7.0", + "Published Information found in section", + $section.getEntitiesOfType("published_information") + ); + }); + end + +rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row" + when + $table: Table(hasEntitiesOfType("published_information"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(redactionEntity -> { + redactionEntity.skipWithReferences( + "CBI.7.1", + "Published Information found in row", + $table.getEntitiesOfTypeInSameRow("published_information", redactionEntity) + ); + }); + end + + +// Rule unit: CBI.9 +rule "CBI.9.0: Redact all Cell's with Header Author(s) as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> { + redactionEntity.apply("CBI.9.0", "Author(s) found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + redactionEntity.addEngine(Engine.RULE); + insert(redactionEntity); + }); + end + +rule "CBI.9.1: Redact all Cell's with Header Author as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $table: Table(hasHeader("Author")) + then + $table.streamTableCellsWithHeader("Author") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> { + redactionEntity.apply("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + redactionEntity.addEngine(Engine.RULE); + insert(redactionEntity); + }); + end + + +// Rule unit: CBI.10 +rule "CBI.10.0: Redact all Cell's with Header Author(s) as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> { + redactionEntity.apply("CBI.10.0", "Author(s) found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + redactionEntity.addEngine(Engine.RULE); + insert(redactionEntity); + }); + end + +rule "CBI.10.1: Redact all Cell's with Header Author as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $table: Table(hasHeader("Author")) + then + $table.streamTableCellsWithHeader("Author") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> { + redactionEntity.apply("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + redactionEntity.addEngine(Engine.RULE); + insert(redactionEntity); + }); + end + + +// Rule unit: CBI.11 +rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" + agenda-group "LOCAL_DICTIONARY_ADDS" + salience -1 + when + $table: Table(hasHeader("Author(s)") && hasHeader("Vertebrate Study Y/N")) + then + $table.getEntitiesOfType("CBI_author").forEach(entity -> dictionary.addMultipleAuthorsAsRecommendation(entity)); + end + + +// Rule unit: CBI.16 +rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.apply("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.addEngine(Engine.RULE); + dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); + insert(entity); + }); + end + +rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.apply("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.addEngine(Engine.RULE); + insert(entity); + dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); + }); + end + + +// Rule unit: CBI.17 +rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon" + when + $section: Section(!hasTables(), containsString("Species") && containsString("Source") && !containsString("Species:") && !containsString("Source:")) + then + entityCreationService.lineAfterString("Source", "CBI_address", EntityType.RECOMMENDATION, $section) + .forEach(entity -> { + entity.addEngine(Engine.RULE); + entity.skip("CBI.17.0", "Line after \"Source\" in Test Organism Section"); + insert(entity); + }); + end + +rule "CBI.17.1: Add recommendation for Addresses in Test Organism sections, with colon" + when + $section: Section(!hasTables(), containsString("Species:"), containsString("Source:")) + then + entityCreationService.lineAfterString("Source:", "CBI_address", EntityType.RECOMMENDATION, $section) + .forEach(entity -> { + entity.addEngine(Engine.RULE); + entity.skip("CBI.17.1", "Line after \"Source:\" in Test Animals Section"); + insert(entity); + }); + end + + +// Rule unit: CBI.20 +rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value == "Yes") + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study"); + laboratoryEntity.addEngine(Engine.RULE); + dictionary.addLocalDictionaryEntry(laboratoryEntity); + insert(laboratoryEntity); + }); + end + +rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value == "Yes") + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.apply("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + laboratoryEntity.addEngine(Engine.RULE); + dictionary.addLocalDictionaryEntry(laboratoryEntity); + insert(laboratoryEntity); + }); + end + + +//------------------------------------ PII rules ------------------------------------ + +// Rule unit: PII.0 +rule "PII.0.0: Redact all PII (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $pii: RedactionEntity(type == "PII", dictionaryEntry) + then + $pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "PII.0.1: Redact all PII (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $pii: RedactionEntity(type == "PII", dictionaryEntry) + then + $pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +// Rule unit: PII.1 +rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> { + emailEntity.addEngine(Engine.RULE); + emailEntity.apply("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + insert(emailEntity); + }); + end + +rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> { + emailEntity.addEngine(Engine.RULE); + emailEntity.apply("PII.1.1", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + insert(emailEntity); + }); + end + + +// Rule unit: PII.2 +rule "PII.2.0: Redact Phone and Fax by RegEx (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(containsString("Contact") || + containsString("Telephone") || + containsString("Phone") || + containsString("Ph.") || + containsString("Fax") || + containsString("Tel") || + containsString("Ter") || + containsString("Mobile") || + containsString("Fel") || + containsString("Fer")) + then + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + .forEach(contactEntity -> { + contactEntity.addEngine(Engine.RULE); + contactEntity.apply("PII.2.0", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + insert(contactEntity); + }); + end + +rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(containsString("Contact") || + containsString("Telephone") || + containsString("Phone") || + containsString("Ph.") || + containsString("Fax") || + containsString("Tel") || + containsString("Ter") || + containsString("Mobile") || + containsString("Fel") || + containsString("Fer")) + then + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + .forEach(contactEntity -> { + contactEntity.addEngine(Engine.RULE); + contactEntity.apply("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + insert(contactEntity); + }); + end + + +// Rule unit: PII.9 +rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:")) + then + entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) + .forEach(authorEntity -> { + authorEntity.apply("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + authorEntity.addEngine(Engine.RULE); + insert(authorEntity); + }); + end + +rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (non vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:")) + then + entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) + .forEach(authorEntity -> { + authorEntity.apply("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + authorEntity.addEngine(Engine.RULE); + insert(authorEntity); + }); + end + +rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:")) + then + entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section) + .forEach(authorEntity -> { + authorEntity.apply("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + authorEntity.addEngine(Engine.RULE); + insert(authorEntity); + }); + end + +rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:")) + then + entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section) + .forEach(authorEntity -> { + authorEntity.apply("PII.9.3", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + authorEntity.addEngine(Engine.RULE); + insert(authorEntity); + }); + end + + +//------------------------------------ Other rules ------------------------------------ + +// Rule unit: ETC.0 +rule "ETC.0.0: Purity Hint" + when + $section: Section(containsStringIgnoreCase("purity")) + then + entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", "hint_only", EntityType.ENTITY, 1, $section) + .forEach(hint -> { + hint.addEngine(Engine.RULE); + hint.skip("ETC.0.0", ""); + }); + end + + +// Rule unit: ETC.2 +rule "ETC.2.0: Redact signatures (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value == "Yes") + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.apply("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "ETC.2.0: Redact signatures (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value == "Yes") + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.apply("ETC.2.0", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +// Rule unit: ETC.3 +rule "ETC.3.0: Redact logos (vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value == "Yes") + $logo: Image(imageType == ImageType.LOGO) + then + $logo.apply("ETC.3.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "ETC.3.1: Redact logos (non vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value == "Yes") + $logo: Image(imageType == ImageType.LOGO) + then + $logo.apply("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +// Rule unit: ETC.5 +rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'" + when + not FileAttribute(label == "Confidentiality", value == "confidential") + $dossierRedaction: RedactionEntity(type == "dossier_redaction") + then + $dossierRedaction.setIgnored(true); + update($dossierRedaction); + $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + end + + +//------------------------------------ AI rules ------------------------------------ + +// Rule unit: AI.0 +rule "AI.0.0: add all NER Entities of type CBI_author" + salience 999 + when + nerEntities: NerEntities(hasEntitiesOfType("CBI_author")) + then + nerEntities.streamEntitiesOfType("CBI_author") + .map(nerEntity -> entityCreationService.byNerEntity(nerEntity, EntityType.RECOMMENDATION, document)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(entity -> insert(entity)); + end + + +// Rule unit: AI.1 +rule "AI.1.0: combine and add NER Entities as CBI_address" + salience 999 + when + nerEntities: NerEntities(hasEntitiesOfType("ORG") || hasEntitiesOfType("STREET") || hasEntitiesOfType("CITY")) + then + nerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities) + .map(boundary -> entityCreationService.byBoundary(boundary, "CBI_address", EntityType.RECOMMENDATION, document)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(entity -> { + entity.addEngine(Engine.NER); + insert(entity); + }); + end + + +//------------------------------------ Manual redaction rules ------------------------------------ + +// Rule unit: MAN.0 +rule "MAN.0.0: Apply manual resize redaction" + salience 128 + when + $resizeRedaction: ManualResizeRedaction($id: annotationId) + $entityToBeResized: RedactionEntity(matchesAnnotationId($id)) + then + manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction); + retract($resizeRedaction); + update($entityToBeResized); + $entityToBeResized.getIntersectingNodes().forEach(node -> update(node)); + end + + +// Rule unit: MAN.1 +rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity" + salience 128 + when + $idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) + not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) + $entityToBeRemoved: RedactionEntity(matchesAnnotationId($id)) + then + $entityToBeRemoved.setIgnored(true); + update($entityToBeRemoved); + retract($idRemoval); + $entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node)); + end + +rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image" + salience 128 + when + $idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) + not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) + $imageEntityToBeRemoved: Image($id == id) + then + $imageEntityToBeRemoved.setIgnored(true); + update($imageEntityToBeRemoved); + retract($idRemoval); + update($imageEntityToBeRemoved.getParent()); + end + + +// Rule unit: MAN.2 +rule "MAN.2.0: Apply force redaction" + salience 128 + when + $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) + $entityToForce: RedactionEntity(matchesAnnotationId($id)) + then + $entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis); + $entityToForce.setRemoved(false); + $entityToForce.setIgnored(false); + $entityToForce.setSkipRemoveEntitiesContainedInLarger(true); + update($entityToForce); + $entityToForce.getIntersectingNodes().forEach(node -> update(node)); + retract($force); + end + + +// Rule unit: MAN.3 +rule "MAN.3.0: Apply image recategorization" + salience 128 + when + $recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type) + $imageToBeRecategorized: Image($id == id) + then + $imageToBeRecategorized.setImageType(ImageType.fromString($imageType)); + update($imageToBeRecategorized); + retract($recategorization); + update($imageToBeRecategorized.getParent()); + end + + +//------------------------------------ Entity merging rules ------------------------------------ + +// Rule unit: X.0 +rule "X.0.0: remove Entity contained by Entity of same type" + salience 65 + when + $larger: RedactionEntity($type: type, $entityType: entityType) + $contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger) + then + $contained.remove(); + retract($contained); + end + + +// Rule unit: X.1 +rule "X.1.0: merge intersecting Entities of same type" + salience 64 + when + $first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger) + $second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger) + then + $first.remove(); + $second.remove(); + RedactionEntity mergedEntity = entityCreationService.byEntities(List.of($first, $second), $type, $entityType, document); + retract($first); + retract($second); + insert(mergedEntity); + mergedEntity.getIntersectingNodes().forEach(node -> update(node)); + end + + +// Rule unit: X.2 +rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" + salience 64 + when + $falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE) + $entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger) + then + $entity.getIntersectingNodes().forEach(node -> update(node)); + $entity.remove(); + retract($entity) + end + + +// Rule unit: X.3 +rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" + salience 64 + when + $falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION) + $recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger) + then + $recommendation.remove(); + retract($recommendation); + end + + +// Rule unit: X.4 +rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" + salience 256 + when + $entity: RedactionEntity($type: type, entityType == EntityType.ENTITY) + $recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger) + then + $entity.addEngines($recommendation.getEngines()); + $recommendation.remove(); + retract($recommendation); + end + + +// Rule unit: X.5 +rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" + salience 256 + when + $entity: RedactionEntity(entityType == EntityType.ENTITY) + $recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger) + then + $recommendation.remove(); + retract($recommendation); + end + + +// Rule unit: X.6 +rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY" + salience 32 + when + $higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY) + $lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger) + then + $lowerRank.getIntersectingNodes().forEach(node -> update(node)); + $lowerRank.remove(); + retract($lowerRank); + end + + +//------------------------------------ File attributes rules ------------------------------------ + +// Rule unit: FA.1 +rule "FA.1.0: remove duplicate FileAttributes" + salience 64 + when + $fileAttribute: FileAttribute($label: label, $value: value) + $duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value) + then + retract($duplicate); + end + + +//------------------------------------ Local dictionary search rules ------------------------------------ + +// Rule unit: LDS.0 +rule "LDS.0.0: run local dictionary search" + agenda-group "LOCAL_DICTIONARY_ADDS" + salience -999 + when + DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels() + then + entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document) + .forEach(entity -> { + entity.addEngine(Engine.RULE); + insert(entity); + }); + end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl index 2e62d4b1..fab5247a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl @@ -132,9 +132,11 @@ rule "CBI.2.0: Don't redact genitive CBI_author" when $entity: RedactionEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), isApplied()) then - RedactionEntity falsePositive = entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document); - falsePositive.skip("CBI.2.0", "Genitive Author found"); - insert(falsePositive); + entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document) + .ifPresent(falsePositive -> { + falsePositive.skip("CBI.2.0", "Genitive Author found"); + insert(falsePositive); + }); end @@ -246,7 +248,6 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red then $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(entity -> { - entity.addEngine(Engine.RULE); entity.applyWithReferences( "CBI.5.0", "no_redaction_indicator but also redaction_indicator found", @@ -264,10 +265,9 @@ rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also red hasEntitiesOfType("redaction_indicator"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no-redaction_indicator")) + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator")) .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) .forEach(entity -> { - entity.addEngine(Engine.RULE); entity.applyWithReferences( "CBI.5.1", "no_redaction_indicator but also redaction_indicator found", @@ -290,7 +290,6 @@ rule "CBI.6.0: Don't redact Names and Addresses if vertebrate but also published then $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(entity -> { - entity.addEngine(Engine.RULE); entity.skipWithReferences( "CBI.6.0", "vertebrate but also published_information found", @@ -307,10 +306,9 @@ rule "CBI.6.1: Don't redact Names and Addresses if vertebrate but also published hasEntitiesOfType("published_information"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no-redaction_indicator")) + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "published_information")) .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) .forEach(entity -> { - entity.addEngine(Engine.RULE); entity.skipWithReferences( "CBI.6.1", "vertebrate but also published_information found", @@ -326,8 +324,8 @@ rule "CBI.6.1: Don't redact Names and Addresses if vertebrate but also published rule "CBI.7.0: Do not redact Names and Addresses if published information found in section without tables" when $section: Section(!hasTables(), - hasEntitiesOfType("published_information"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + hasEntitiesOfType("published_information"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(redactionEntity -> { @@ -342,7 +340,7 @@ rule "CBI.7.0: Do not redact Names and Addresses if published information found rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row" when $table: Table(hasEntitiesOfType("published_information"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(redactionEntity -> { @@ -522,7 +520,7 @@ rule "CBI.13.0: Ignore CBI Address Recommendations" not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $entity: RedactionEntity(type == "CBI_address", entityType == EntityType.RECOMMENDATION) then - $entity.removeFromGraph(); + $entity.remove(); retract($entity) end @@ -655,11 +653,13 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials" anyMatch(textAfter, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)") ) then - RedactionEntity expandedEntity = entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)"); - expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); - $entityToExpand.removeFromGraph(); - retract($entityToExpand); - insert(expandedEntity); + entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)") + .ifPresent(expandedEntity -> { + expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); + $entityToExpand.remove(); + retract($entityToExpand); + insert(expandedEntity); + }); end @@ -668,11 +668,13 @@ rule "CBI.19.0: Expand CBI_author entities with salutation prefix" when $entityToExpand: RedactionEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) then - RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"); - expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); - $entityToExpand.removeFromGraph(); - retract($entityToExpand); - insert(expandedEntity); + entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*") + .ifPresent(expandedEntity -> { + expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); + $entityToExpand.remove(); + retract($entityToExpand); + insert(expandedEntity); + }); end @@ -1143,10 +1145,12 @@ rule "PII.12.0: Expand PII entities with salutation prefix" when $entityToExpand: RedactionEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) then - RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"); - expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); - expandedEntity.addEngine(Engine.RULE); - insert(expandedEntity); + entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*") + .ifPresent(expandedEntity -> { + expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); + expandedEntity.addEngine(Engine.RULE); + insert(expandedEntity); + }); end @@ -1229,8 +1233,9 @@ rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confi not FileAttribute(label == "Confidentiality", value == "confidential") $dossierRedaction: RedactionEntity(type == "dossier_redaction") then - $dossierRedaction.removeFromGraph(); - retract($dossierRedaction); + $dossierRedaction.setIgnored(true); + update($dossierRedaction); + $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); end @@ -1290,6 +1295,8 @@ rule "AI.0.0: add all NER Entities of type CBI_author" then nerEntities.streamEntitiesOfType("CBI_author") .map(nerEntity -> entityCreationService.byNerEntity(nerEntity, EntityType.RECOMMENDATION, document)) + .filter(Optional::isPresent) + .map(Optional::get) .forEach(entity -> insert(entity)); end @@ -1302,6 +1309,8 @@ rule "AI.1.0: combine and add NER Entities as CBI_address" then nerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities) .map(boundary -> entityCreationService.byBoundary(boundary, "CBI_address", EntityType.RECOMMENDATION, document)) + .filter(Optional::isPresent) + .map(Optional::get) .forEach(entity -> { entity.addEngine(Engine.NER); insert(entity); @@ -1318,6 +1327,8 @@ rule "AI.2.0: add all NER Entities of any type except CBI_author" nerEntities.getNerEntityList().stream() .filter(nerEntity -> !nerEntity.type().equals("CBI_author")) .map(nerEntity -> entityCreationService.byNerEntity(nerEntity, nerEntity.type().toLowerCase(), EntityType.RECOMMENDATION, document)) + .filter(Optional::isPresent) + .map(Optional::get) .forEach(entity -> insert(entity)); end @@ -1334,6 +1345,7 @@ rule "MAN.0.0: Apply manual resize redaction" manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction); retract($resizeRedaction); update($entityToBeResized); + $entityToBeResized.getIntersectingNodes().forEach(node -> update(node)); end @@ -1341,21 +1353,27 @@ rule "MAN.0.0: Apply manual resize redaction" rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity" salience 128 when - IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) + $idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) $entityToBeRemoved: RedactionEntity(matchesAnnotationId($id)) then $entityToBeRemoved.setIgnored(true); + update($entityToBeRemoved); + retract($idRemoval); + $entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node)); end rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image" salience 128 when - IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) + $idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) $imageEntityToBeRemoved: Image($id == id) then $imageEntityToBeRemoved.setIgnored(true); + update($imageEntityToBeRemoved); + retract($idRemoval); + update($imageEntityToBeRemoved.getParent()); end @@ -1363,11 +1381,16 @@ rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to rule "MAN.2.0: Apply force redaction" salience 128 when - ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) + $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) $entityToForce: RedactionEntity(matchesAnnotationId($id)) then $entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis); + $entityToForce.setRemoved(false); + $entityToForce.setIgnored(false); $entityToForce.setSkipRemoveEntitiesContainedInLarger(true); + update($entityToForce); + $entityToForce.getIntersectingNodes().forEach(node -> update(node)); + retract($force); end @@ -1375,10 +1398,13 @@ rule "MAN.2.0: Apply force redaction" rule "MAN.3.0: Apply image recategorization" salience 128 when - ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type) - $image: Image($id == id) + $recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type) + $imageToBeRecategorized: Image($id == id) then - $image.setImageType(ImageType.fromString($imageType)); + $imageToBeRecategorized.setImageType(ImageType.fromString($imageType)); + update($imageToBeRecategorized); + update($imageToBeRecategorized.getParent()); + retract($recategorization); end @@ -1391,7 +1417,7 @@ rule "X.0.0: remove Entity contained by Entity of same type" $larger: RedactionEntity($type: type, $entityType: entityType) $contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger) then - $contained.removeFromGraph(); + $contained.remove(); retract($contained); end @@ -1403,12 +1429,13 @@ rule "X.1.0: merge intersecting Entities of same type" $first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger) $second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger) then - $first.removeFromGraph(); - $second.removeFromGraph(); + $first.remove(); + $second.remove(); RedactionEntity mergedEntity = entityCreationService.byEntities(List.of($first, $second), $type, $entityType, document); retract($first); retract($second); insert(mergedEntity); + mergedEntity.getIntersectingNodes().forEach(node -> update(node)); end @@ -1419,7 +1446,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" $falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE) $entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger) then - $entity.removeFromGraph(); + $entity.getIntersectingNodes().forEach(node -> update(node)); + $entity.remove(); retract($entity) end @@ -1431,7 +1459,7 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM $falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION) $recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger) then - $recommendation.removeFromGraph(); + $recommendation.remove(); retract($recommendation); end @@ -1444,7 +1472,7 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit $recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger) then $entity.addEngines($recommendation.getEngines()); - $recommendation.removeFromGraph(); + $recommendation.remove(); retract($recommendation); end @@ -1456,7 +1484,7 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" $entity: RedactionEntity(entityType == EntityType.ENTITY) $recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger) then - $recommendation.removeFromGraph(); + $recommendation.remove(); retract($recommendation); end @@ -1468,7 +1496,8 @@ rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENT $higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY) $lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger) then - $lowerRank.removeFromGraph(); + $lowerRank.getIntersectingNodes().forEach(node -> update(node)); + $lowerRank.remove(); retract($lowerRank); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index e44ff68b..00c9a4d6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -14,6 +14,11 @@ import java.util.Optional; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Section; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SectionIdentifier; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Table; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Paragraph; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Headline; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.*; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.*; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType; @@ -308,7 +313,18 @@ rule "DOC.5.0: Strain" entity.apply("DOC.5.0", "Strain found.", "n-a"); }); end - +rule "DOC.6.0" + when + Headline(containsStringIgnoreCase("materials and methods"), $sectionIdentifierMaterials: getSectionIdentifier()) + Headline(containsStringIgnoreCase("controls"), getSectionIdentifier().isChildOf($sectionIdentifierMaterials), $sectionIdentifierControls: getSectionIdentifier()) + $headline: Headline(containsStringIgnoreCase("positive control substances"), getSectionIdentifier().isChildOf($sectionIdentifierControls)) + then + System.out.println($headline); + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "irgendwas", EntityType.ENTITY) + .forEach(entity -> { + entity.apply("DOC.6.0", "positive control substance found", "n-a"); + }); + end //rule "DOC.7.0: study title by document structure" // when @@ -328,7 +344,7 @@ rule "DOC.7.0: study title" when $section: Section(isOnPage(1) && (containsString("Final Report") || containsString("SPL"))) then - entityCreationService.byRegexWithLinebreaks("(?<=\\n)[\\w\\W]{1,300}(?=\\nFinal Report)", "title", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { + entityCreationService.byRegexWithLineBreaks("(?<=\\n)[\\w\\W]{1,300}(?=\\nFinal Report)", "title", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { entity.apply("DOC.7.0", "Title found", "n-a"); }); entityCreationService.betweenStrings("TITLE", "DATA REQUIREMENT", "title", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { @@ -358,6 +374,8 @@ rule "DOC.8.1: Performing Laboratory (Name)" nerEntities.streamEntitiesOfType("COUNTRY") .filter(nerEntity -> $section.getBoundary().contains(nerEntity.boundary())) .map(nerEntity -> entityCreationService.byNerEntity(nerEntity, "laboratory_country", EntityType.ENTITY, $section)) + .filter(Optional::isPresent) + .map(Optional::get) .forEach(entity -> { entity.apply("DOC.8.2", "Performing Laboratory found", "n-a"); insert(entity); @@ -572,8 +590,8 @@ rule "DOC.13.0: Clinical Signs" && hasParagraphs() ) then - var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "clinical_signs", EntityType.ENTITY, $section); - entity.apply("DOC.13.0", "Clinical Signs found", "n-a"); + entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_signs", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.13.0", "Clinical Signs found", "n-a")); end @@ -591,7 +609,7 @@ rule "DOC.14.0: Dosages" entityCreationService.betweenStrings("Animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> { entity.apply("DOC.14.0", "Dosage found", "n-a"); }); - entityCreationService.byRegexWithLinebreaks("(?:\\.[\\s|\\n]|^.{5,20}\\n)([^\\.]{1,200}(?:animal|given|received)[^\\.]{1,200}dose\\s(?:levels?\\s)?(?:of|at)[^\\.]{1,200})(?:\\.[\\s|\\n|$])", "dosages", EntityType.ENTITY,1, $section).forEach(entity -> { + entityCreationService.byRegexWithLineBreaks("(?:\\.[\\s|\\n]|^.{5,20}\\n)([^\\.]{1,200}(?:animal|given|received)[^\\.]{1,200}dose\\s(?:levels?\\s)?(?:of|at)[^\\.]{1,200})(?:\\.[\\s|\\n|$])", "dosages", EntityType.ENTITY,1, $section).forEach(entity -> { entity.apply("DOC.14.0", "Dosage found", "n-a"); }); end @@ -602,8 +620,8 @@ rule "DOC.15.0: Mortality" $headline: Headline(containsString("Mortality") && !containsString("TABLE") && hasParagraphs()) FileAttribute(label == "OECD Number", value == "425") then - var entity = entityCreationService.byBoundary(Boundary.merge($headline.getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "mortality", EntityType.ENTITY, $headline.getParent()); - entity.apply("DOC.15.0", "Mortality found", "n-a"); + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.15.0", "Mortality found", "n-a")); end @@ -615,8 +633,8 @@ rule "DOC.17.0: Study Conclusion" && hasParagraphs() ) then - var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "study_conclusion", EntityType.ENTITY, $section); - entity.apply("DOC.17.0", "Study Conclusion found", "n-a"); + entityCreationService.bySemanticNodeParagraphsOnly($section, "study_conclusion", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.17.0", "Study Conclusion found", "n-a")); end @@ -634,8 +652,8 @@ rule "DOC.18.0: Weight Behavior Changes" && hasParagraphs() ) then - var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "weight_behavior_changes", EntityType.ENTITY, $section); - entity.apply("DOC.18.0", "Weight behavior changes found", "n-a"); + entityCreationService.bySemanticNodeParagraphsOnly($section, "weight_behavior_changes", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.18.0", "Weight behavior changes found", "n-a")); end @@ -653,8 +671,8 @@ rule "DOC.19.0: Necropsy findings" && hasParagraphs() ) then - var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "necropsy_findings", EntityType.ENTITY, $section); - entity.apply("DOC.19.0", "Necropsy section found", "n-a"); + entityCreationService.bySemanticNodeParagraphsOnly($section, "necropsy_findings", EntityType.ENTITY) + .forEach( entity -> entity.apply("DOC.19.0", "Necropsy section found", "n-a")); end @@ -673,8 +691,8 @@ rule "DOC.22.0: Clinical observations" && hasParagraphs() ) then - var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "clinical_observations", EntityType.ENTITY, $section); - entity.apply("DOC.22.0", "Clinical observations section found", "n-a"); + entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_observations", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.22.0", "Clinical observations section found", "n-a")); end @@ -730,8 +748,8 @@ rule "DOC.23.0: Bodyweight changes" && hasParagraphs() ) then - var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "bodyweight_changes", EntityType.ENTITY, $section); - entity.apply("DOC.23.0", "Bodyweight section found", "n-a"); + entityCreationService.bySemanticNodeParagraphsOnly($section, "bodyweight_changes", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.23.0", "Bodyweight section found", "n-a")); end @@ -743,8 +761,8 @@ rule "DOC.24.0: Study Design" && hasParagraphs() ) then - var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "study_design", EntityType.ENTITY, $section); - entity.apply("DOC.24.0", "Study design section found", "n-a"); + entityCreationService.bySemanticNodeParagraphsOnly($section, "study_design", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.24.0", "Study design section found", "n-a")); end @@ -765,8 +783,8 @@ rule "DOC.25.0: Results and Conclusion (406, 428, 438, 439, 474 & 487)" && hasParagraphs() ) then - var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "results_and_conclusion", EntityType.ENTITY, $section); - entity.apply("DOC.25.0", "Results and Conclusion found", "n-a"); + entityCreationService.bySemanticNodeParagraphsOnly($section, "results_and_conclusion", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.25.0", "Results and Conclusion found", "n-a")); end @@ -800,8 +818,8 @@ rule "DOC.32.0: Preliminary Test Results (429)" && hasParagraphs() ) then - var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "preliminary_test_results", EntityType.ENTITY, $section); - entity.apply("DOC.32.0", "Preliminary Test Results found", "n-a"); + entityCreationService.bySemanticNodeParagraphsOnly($section, "preliminary_test_results", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.32.0", "Preliminary Test Results found", "n-a")); end @@ -810,8 +828,8 @@ rule "DOC.33.0: Test Results (429)" FileAttribute(label == "OECD Number", value == "429") $section: Section((getHeadline().containsString("RESULTS AND DISCUSSION") || getHeadline().containsString("Estimation of the proliferative response of lymph node cells") || getHeadline().containsString("Results in the Main Experiment")) && hasParagraphs()) then - var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "test_results", EntityType.ENTITY, $section); - entity.apply("DOC.33.0", "Test Results found", "n-a"); + entityCreationService.bySemanticNodeParagraphsOnly($section, "test_results", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.33.0", "Test Results found", "n-a")); end @@ -946,8 +964,8 @@ rule "DOC.39.0: Dilution of the test substance" && hasParagraphs() ) then - var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "dilution", EntityType.ENTITY, $section); - entity.apply("DOC.39.0", "Dilution found.", "n-a"); + entityCreationService.bySemanticNodeParagraphsOnly($section, "dilution", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.39.0", "Dilution found.", "n-a")); end @@ -960,8 +978,8 @@ rule "DOC.40.0: Positive Control" && hasParagraphs() ) then - var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "positive_control", EntityType.ENTITY, $section); - entity.apply("DOC.40.0", "Positive control found.", "n-a"); + entityCreationService.bySemanticNodeParagraphsOnly($section, "positive_control", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.40.0", "Positive control found.", "n-a")); end @@ -970,8 +988,8 @@ rule "DOC.42.0: Mortality Statement" FileAttribute(label == "OECD Number", value == "402") $headline: Headline(containsString("Mortality") && !containsString("TABLE") && hasParagraphs()) then - var entity = entityCreationService.byBoundary(Boundary.merge($headline.getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "mortality_statement", EntityType.ENTITY, $headline.getParent()); - entity.apply("DOC.42.0", "Mortality Statement found", "n-a"); + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality_statement", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.42.0", "Mortality Statement found", "n-a")); end @@ -1043,8 +1061,8 @@ rule "DOC.45.0: Doses (mg/kg bodyweight)" && hasParagraphs() ) then - var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "doses_(mg_kg_bw)", EntityType.ENTITY, $section); - entity.apply("DOC.45.0", "Doses per bodyweight information found", "n-a"); + entityCreationService.bySemanticNodeParagraphsOnly($section, "doses_(mg_kg_bw)", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.45.0", "Doses per bodyweight information found", "n-a")); end @@ -1090,11 +1108,16 @@ rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to rule "MAN.2.0: Apply force redaction" salience 128 when - ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) + $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) $entityToForce: RedactionEntity(matchesAnnotationId($id)) then $entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis); + $entityToForce.setRemoved(false); + $entityToForce.setIgnored(false); $entityToForce.setSkipRemoveEntitiesContainedInLarger(true); + update($entityToForce); + $entityToForce.getIntersectingNodes().forEach(node -> update(node)); + retract($force); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index 57c5e799..ddabbc57 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -184,7 +184,6 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red then $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(entity -> { - entity.addEngine(Engine.RULE); entity.applyWithReferences( "CBI.5.0", "no_redaction_indicator but also redaction_indicator found", @@ -202,10 +201,9 @@ rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also red hasEntitiesOfType("redaction_indicator"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no-redaction_indicator")) + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator")) .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) .forEach(entity -> { - entity.addEngine(Engine.RULE); entity.applyWithReferences( "CBI.5.1", "no_redaction_indicator but also redaction_indicator found", @@ -471,11 +469,13 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials" anyMatch(textAfter, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)") ) then - RedactionEntity expandedEntity = entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)"); - expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); - $entityToExpand.removeFromGraph(); - retract($entityToExpand); - insert(expandedEntity); + entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)") + .ifPresent(expandedEntity -> { + expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); + $entityToExpand.remove(); + retract($entityToExpand); + insert(expandedEntity); + }); end @@ -484,11 +484,13 @@ rule "CBI.19.0: Expand CBI_author entities with salutation prefix" when $entityToExpand: RedactionEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) then - RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"); - expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); - $entityToExpand.removeFromGraph(); - retract($entityToExpand); - insert(expandedEntity); + entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*") + .ifPresent(expandedEntity -> { + expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); + $entityToExpand.remove(); + retract($entityToExpand); + insert(expandedEntity); + }); end @@ -836,10 +838,12 @@ rule "PII.12.0: Expand PII entities with salutation prefix" when $entityToExpand: RedactionEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) then - RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"); - expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); - expandedEntity.addEngine(Engine.RULE); - insert(expandedEntity); + entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*") + .ifPresent(expandedEntity -> { + expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); + expandedEntity.addEngine(Engine.RULE); + insert(expandedEntity); + }); end @@ -909,8 +913,9 @@ rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confi not FileAttribute(label == "Confidentiality", value == "confidential") $dossierRedaction: RedactionEntity(type == "dossier_redaction") then - $dossierRedaction.removeFromGraph(); - retract($dossierRedaction); + $dossierRedaction.setIgnored(true); + update($dossierRedaction); + $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); end @@ -970,6 +975,8 @@ rule "AI.0.0: add all NER Entities of type CBI_author" then nerEntities.streamEntitiesOfType("CBI_author") .map(nerEntity -> entityCreationService.byNerEntity(nerEntity, EntityType.RECOMMENDATION, document)) + .filter(Optional::isPresent) + .map(Optional::get) .forEach(entity -> insert(entity)); end @@ -982,6 +989,8 @@ rule "AI.1.0: combine and add NER Entities as CBI_address" then nerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities) .map(boundary -> entityCreationService.byBoundary(boundary, "CBI_address", EntityType.RECOMMENDATION, document)) + .filter(Optional::isPresent) + .map(Optional::get) .forEach(entity -> { entity.addEngine(Engine.NER); insert(entity); @@ -1001,6 +1010,7 @@ rule "MAN.0.0: Apply manual resize redaction" manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction); retract($resizeRedaction); update($entityToBeResized); + $entityToBeResized.getIntersectingNodes().forEach(node -> update(node)); end @@ -1008,21 +1018,27 @@ rule "MAN.0.0: Apply manual resize redaction" rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity" salience 128 when - IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) + $idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) $entityToBeRemoved: RedactionEntity(matchesAnnotationId($id)) then $entityToBeRemoved.setIgnored(true); + update($entityToBeRemoved); + retract($idRemoval); + $entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node)); end rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image" salience 128 when - IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) + $idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) $imageEntityToBeRemoved: Image($id == id) then $imageEntityToBeRemoved.setIgnored(true); + update($imageEntityToBeRemoved); + retract($idRemoval); + update($imageEntityToBeRemoved.getParent()); end @@ -1030,11 +1046,16 @@ rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to rule "MAN.2.0: Apply force redaction" salience 128 when - ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) + $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) $entityToForce: RedactionEntity(matchesAnnotationId($id)) then $entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis); + $entityToForce.setRemoved(false); + $entityToForce.setIgnored(false); $entityToForce.setSkipRemoveEntitiesContainedInLarger(true); + update($entityToForce); + $entityToForce.getIntersectingNodes().forEach(node -> update(node)); + retract($force); end @@ -1042,10 +1063,13 @@ rule "MAN.2.0: Apply force redaction" rule "MAN.3.0: Apply image recategorization" salience 128 when - ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type) - $image: Image($id == id) + $recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type) + $imageToBeRecategorized: Image($id == id) then - $image.setImageType(ImageType.fromString($imageType)); + $imageToBeRecategorized.setImageType(ImageType.fromString($imageType)); + update($imageToBeRecategorized); + retract($recategorization); + update($imageToBeRecategorized.getParent()); end @@ -1058,7 +1082,7 @@ rule "X.0.0: remove Entity contained by Entity of same type" $larger: RedactionEntity($type: type, $entityType: entityType) $contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger) then - $contained.removeFromGraph(); + $contained.remove(); retract($contained); end @@ -1070,12 +1094,13 @@ rule "X.1.0: merge intersecting Entities of same type" $first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger) $second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger) then - $first.removeFromGraph(); - $second.removeFromGraph(); + $first.remove(); + $second.remove(); RedactionEntity mergedEntity = entityCreationService.byEntities(List.of($first, $second), $type, $entityType, document); retract($first); retract($second); insert(mergedEntity); + mergedEntity.getIntersectingNodes().forEach(node -> update(node)); end @@ -1086,7 +1111,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" $falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE) $entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger) then - $entity.removeFromGraph(); + $entity.getIntersectingNodes().forEach(node -> update(node)); + $entity.remove(); retract($entity) end @@ -1098,7 +1124,7 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM $falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION) $recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger) then - $recommendation.removeFromGraph(); + $recommendation.remove(); retract($recommendation); end @@ -1111,7 +1137,7 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit $recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger) then $entity.addEngines($recommendation.getEngines()); - $recommendation.removeFromGraph(); + $recommendation.remove(); retract($recommendation); end @@ -1123,7 +1149,7 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" $entity: RedactionEntity(entityType == EntityType.ENTITY) $recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger) then - $recommendation.removeFromGraph(); + $recommendation.remove(); retract($recommendation); end @@ -1135,7 +1161,8 @@ rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENT $higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY) $lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger) then - $lowerRank.removeFromGraph(); + $lowerRank.getIntersectingNodes().forEach(node -> update(node)); + $lowerRank.remove(); retract($lowerRank); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl index 34382527..2c1a72c8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl @@ -56,9 +56,11 @@ rule "add NER Entities of type CBI_author or CBI_address" when $nerEntity: EntityRecognitionEntity($type: type, (type == "CBI_author" || type == "CBI_address")) then - RedactionEntity redactionEntity = entityCreationService.byBoundary(new Boundary($nerEntity.getStartOffset(), $nerEntity.getEndOffset()), $type, EntityType.RECOMMENDATION, document); - redactionEntity.addEngine(Engine.NER); - insert(redactionEntity); + entityCreationService.byBoundary(new Boundary($nerEntity.getStartOffset(), $nerEntity.getEndOffset()), $type, EntityType.RECOMMENDATION, document) + .ifPresent(redactionEntity -> { + redactionEntity.addEngine(Engine.NER); + insert(redactionEntity); + }); end // --------------------------------------- CBI rules ------------------------------------------------------------------- @@ -81,91 +83,126 @@ rule "Always redact PII" $cbiAuthor.apply("PII.0.0", "PII found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end -// --------------------------------------- merging rules ------------------------------------------------------------------- +//------------------------------------ Entity merging rules ------------------------------------ -rule "remove Entity contained by Entity of same type" +// Rule unit: X.0 +rule "X.0.0: remove Entity contained by Entity of same type" salience 65 when $larger: RedactionEntity($type: type, $entityType: entityType) $contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger) then - $contained.removeFromGraph(); + $contained.remove(); retract($contained); end -rule "merge intersecting Entities of same type" + +// Rule unit: X.1 +rule "X.1.0: merge intersecting Entities of same type" salience 64 when $first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger) $second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger) then - $first.removeFromGraph(); - $second.removeFromGraph(); + $first.remove(); + $second.remove(); RedactionEntity mergedEntity = entityCreationService.byEntities(List.of($first, $second), $type, $entityType, document); retract($first); retract($second); insert(mergedEntity); + mergedEntity.getIntersectingNodes().forEach(node -> update(node)); end -rule "remove Entity of type ENTITY when contained by FALSE_POSITIVE" + +// Rule unit: X.2 +rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE) $entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger) then - $entity.removeFromGraph(); + $entity.getIntersectingNodes().forEach(node -> update(node)); + $entity.remove(); retract($entity) end -rule "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" + +// Rule unit: X.3 +rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" salience 64 when $falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION) $recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger) then - $recommendation.removeFromGraph(); + $recommendation.remove(); retract($recommendation); end -rule "remove Entity of type RECOMMENDATION when contained by ENTITY" - salience 64 + +// Rule unit: X.4 +rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" + salience 256 when $entity: RedactionEntity($type: type, entityType == EntityType.ENTITY) - $recommendation: RedactionEntity(containedBy($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger) + $recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger) then - $recommendation.removeFromGraph(); + $entity.addEngines($recommendation.getEngines()); + $recommendation.remove(); retract($recommendation); end -rule "remove Entity of lower rank, when equal boundaries and entityType" + +// Rule unit: X.5 +rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" + salience 256 + when + $entity: RedactionEntity(entityType == EntityType.ENTITY) + $recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger) + then + $recommendation.remove(); + retract($recommendation); + end + + +// Rule unit: X.6 +rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY" salience 32 when - $higherRank: RedactionEntity($type: type, $entityType: entityType, $boundary: boundary) - $lowerRank: RedactionEntity($boundary == boundary, type != $type, entityType == $entityType, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !applied) + $higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY) + $lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger) then - $lowerRank.removeFromGraph(); + $lowerRank.getIntersectingNodes().forEach(node -> update(node)); + $lowerRank.remove(); retract($lowerRank); end -// --------------------------------------- FileAttribute Rules ------------------------------------------------------------------- -rule "remove duplicate FileAttributes" +//------------------------------------ File attributes rules ------------------------------------ + +// Rule unit: FA.1 +rule "FA.1.0: remove duplicate FileAttributes" salience 64 when - $first: FileAttribute($label: label, $value: value) - $second: FileAttribute(this != $first, label == $label, value == $value) + $fileAttribute: FileAttribute($label: label, $value: value) + $duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value) then - retract($second); + retract($duplicate); end -// --------------------------------------- local dictionary search ------------------------------------------------------------------- -rule "run local dictionary search" +//------------------------------------ Local dictionary search rules ------------------------------------ + +// Rule unit: LDS.0 +rule "LDS.0.0: run local dictionary search" agenda-group "LOCAL_DICTIONARY_ADDS" salience -999 when DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels() then entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document) - .forEach(redactionEntity -> insert(redactionEntity)); + .forEach(entity -> { + entity.addEngine(Engine.RULE); + insert(entity); + }); end +