From ecbc65636502b3a485845143bd9a19e7a75745d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kilian=20Sch=C3=BCttler?= Date: Thu, 31 Aug 2023 15:23:42 +0200 Subject: [PATCH] RED-7317: fix behavior of recategorize --- .../build.gradle.kts | 3 +- .../graph/ConsecutiveBoundaryCollector.java | 20 +- .../graph/{Boundary.java => TextRange.java} | 42 +-- .../{MatchedRuleHolder.java => Entity.java} | 79 +++-- .../graph/entity/ManualChangeOverwrite.java | 200 +++++++++++ .../document/graph/entity/MatchedRule.java | 50 ++- ...ctionPosition.java => PositionOnPage.java} | 7 +- .../{RedactionEntity.java => TextEntity.java} | 65 ++-- .../server/document/graph/nodes/Document.java | 4 +- .../server/document/graph/nodes/Footer.java | 4 +- .../server/document/graph/nodes/Header.java | 4 +- .../server/document/graph/nodes/Headline.java | 4 +- .../v1/server/document/graph/nodes/Image.java | 15 +- .../v1/server/document/graph/nodes/Page.java | 5 +- .../document/graph/nodes/Paragraph.java | 4 +- .../server/document/graph/nodes/Section.java | 4 +- .../document/graph/nodes/SemanticNode.java | 66 ++-- .../v1/server/document/graph/nodes/Table.java | 26 +- .../document/graph/nodes/TableCell.java | 4 +- .../graph/textblock/AtomicTextBlock.java | 64 ++-- .../textblock/ConcatenatedTextBlock.java | 66 ++-- .../document/graph/textblock/TextBlock.java | 50 +-- .../services/EntityCreationService.java | 267 ++++++++------- .../services/EntityEnrichmentService.java | 14 +- .../ManualChangesApplicationService.java | 95 ++++++ .../ManualRedactionApplicationService.java | 64 ---- .../utils/RectangleTransformations.java | 12 - .../utils/RedactionSearchUtility.java | 68 ++-- .../adapter/CustomEntityCreationAdapter.java | 108 +++--- .../server/redaction/adapter/NerEntities.java | 4 +- .../redaction/adapter/NerEntitiesAdapter.java | 36 +- .../redaction/model/EntityIdentifier.java | 73 ---- .../server/redaction/model/ManualEntity.java | 91 +++++ .../model/dictionary/Dictionary.java | 16 +- .../dictionary/SearchImplementation.java | 14 +- .../redaction/service/AnalyzeService.java | 39 ++- .../service/DroolsExecutionService.java | 6 +- .../service/EntityRedactionService.java | 12 +- .../service/ManualChangeFactory.java | 43 +++ .../service/ManualRedactionEntryService.java | 58 ++++ .../service/RedactionLogCreatorService.java | 182 ++++++---- .../redaction/utils/SeparatorUtils.java | 28 +- .../v1/server/RedactionIntegrationTest.java | 166 +-------- .../v1/server/annotate/AnnotationService.java | 3 - ...ionEntityTest.java => TextEntityTest.java} | 23 +- .../server/document/graph/BoundaryTest.java | 85 ----- ...ocumentEntityInsertionIntegrationTest.java | 162 ++++----- .../DocumentPerformanceIntegrationTest.java | 24 +- .../document/graph/MigrationPocTest.java | 7 +- .../graph/SearchImplementationTest.java | 4 +- .../server/document/graph/TextRangeTest.java | 85 +++++ .../CustomEntityCreationAdapterTest.java} | 31 +- .../ManualChangesEnd2EndTest.java | 323 ++++++++++++++++++ .../ManualChangesIntegrationTest.java} | 100 +++--- .../manualchanges/ManualChangesUnitTest.java | 134 ++++++++ .../AnalyseFileRealDataIntegrationTest.java | 34 +- .../adapter/NerEntitiesAdapterTest.java | 32 +- .../service/DroolsExecutionServiceTest.java | 13 + .../resources/drools/acceptance_rules.drl | 102 +++--- .../src/test/resources/drools/all_rules.drl | 155 +++++---- .../test/resources/drools/documine_flora.drl | 26 +- .../drools/manual_redaction_rules.drl | 92 +++-- .../src/test/resources/drools/rules.drl | 141 ++++---- .../src/test/resources/drools/rules_v2.drl | 48 +-- .../EFSA_sanitisation_GFL_v1/rules.drl | 154 ++++++--- 65 files changed, 2411 insertions(+), 1549 deletions(-) rename redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/{Boundary.java => TextRange.java} (68%) rename redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/{MatchedRuleHolder.java => Entity.java} (69%) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/ManualChangeOverwrite.java rename redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/{RedactionPosition.java => PositionOnPage.java} (74%) rename redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/{RedactionEntity.java => TextEntity.java} (68%) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/services/ManualChangesApplicationService.java delete mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/services/ManualRedactionApplicationService.java delete mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/EntityIdentifier.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/ManualEntity.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualChangeFactory.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionEntryService.java rename redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/entity/{RedactionEntityTest.java => TextEntityTest.java} (66%) delete mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/BoundaryTest.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TextRangeTest.java rename redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/{document/graph/ManualRedactionEntryTest.java => manualchanges/CustomEntityCreationAdapterTest.java} (81%) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java rename redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/{document/graph/ManualResizeRedactionIntegrationTest.java => manualchanges/ManualChangesIntegrationTest.java} (66%) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesUnitTest.java diff --git a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts index 2fc24723..e3c1b5a7 100644 --- a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts +++ b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts @@ -16,6 +16,7 @@ val layoutParserVersion = "0.25.0" val jacksonVersion = "2.15.2" val droolsVersion = "8.43.0.Final" val pdfBoxVersion = "3.0.0-alpha2" +val persistenceServiceVersion = "2.155.0" configurations { all { @@ -26,7 +27,7 @@ configurations { dependencies { implementation(project(":redaction-service-api-v1")) { exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1") } - implementation("com.iqser.red.service:persistence-service-internal-api-v1:2.119.0") { exclude(group = "org.springframework.boot") } + implementation("com.iqser.red.service:persistence-service-internal-api-v1:${persistenceServiceVersion}") { exclude(group = "org.springframework.boot") } implementation("com.knecon.fforesight:layoutparser-service-internal-api:${layoutParserVersion}") implementation("com.iqser.red.commons:spring-commons:2.7.0") diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/ConsecutiveBoundaryCollector.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/ConsecutiveBoundaryCollector.java index aceb9bd7..a77430a6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/ConsecutiveBoundaryCollector.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/ConsecutiveBoundaryCollector.java @@ -11,17 +11,17 @@ import java.util.stream.Collector; import com.google.common.base.Functions; -public class ConsecutiveBoundaryCollector implements Collector, List> { +public class ConsecutiveBoundaryCollector implements Collector, List> { @Override - public Supplier> supplier() { + public Supplier> supplier() { return LinkedList::new; } @Override - public BiConsumer, Boundary> accumulator() { + public BiConsumer, TextRange> accumulator() { return (existingList, boundary) -> { if (existingList.isEmpty()) { @@ -29,14 +29,14 @@ public class ConsecutiveBoundaryCollector implements Collector boundary.start()) { - throw new IllegalArgumentException(String.format("Can't concatenate %s and %s. Boundaries must be ordered!", prevBoundary, boundary)); + TextRange prevTextRange = existingList.get(existingList.size() - 1); + if (prevTextRange.end() > boundary.start()) { + throw new IllegalArgumentException(String.format("Can't concatenate %s and %s. Boundaries must be ordered!", prevTextRange, boundary)); } - if (prevBoundary.end() == boundary.start()) { + if (prevTextRange.end() == boundary.start()) { existingList.remove(existingList.size() - 1); - existingList.add(Boundary.merge(List.of(prevBoundary, boundary))); + existingList.add(TextRange.merge(List.of(prevTextRange, boundary))); } else { existingList.add(boundary); } @@ -45,7 +45,7 @@ public class ConsecutiveBoundaryCollector implements Collector> combiner() { + public BinaryOperator> combiner() { return (list1, list2) -> { list1.addAll(list2); @@ -55,7 +55,7 @@ public class ConsecutiveBoundaryCollector implements Collector, List> finisher() { + public Function, List> finisher() { return Functions.identity(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/Boundary.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/TextRange.java similarity index 68% rename from redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/Boundary.java rename to redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/TextRange.java index 31116bc5..6b624e60 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/Boundary.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/TextRange.java @@ -13,13 +13,13 @@ import lombok.Setter; @Setter @EqualsAndHashCode -public class Boundary implements Comparable { +public class TextRange implements Comparable { private int start; private int end; - public Boundary(int start, int end) { + public TextRange(int start, int end) { if (start > end) { throw new IllegalArgumentException(format("start: %d > end: %d", start, end)); @@ -47,15 +47,15 @@ public class Boundary implements Comparable { } - public boolean contains(Boundary boundary) { + public boolean contains(TextRange textRange) { - return start <= boundary.start() && boundary.end() <= end; + return start <= textRange.start() && textRange.end() <= end; } - public boolean containedBy(Boundary boundary) { + public boolean containedBy(TextRange textRange) { - return boundary.contains(this); + return textRange.contains(this); } @@ -83,18 +83,18 @@ public class Boundary implements Comparable { } - public boolean intersects(Boundary boundary) { + public boolean intersects(TextRange textRange) { - return boundary.start() < this.end && this.start < boundary.end(); + return textRange.start() < this.end && this.start < textRange.end(); } - public List split(List splitIndices) { + public List split(List splitIndices) { if (splitIndices.stream().anyMatch(idx -> !this.contains(idx))) { throw new IndexOutOfBoundsException(format("%s splitting indices are out of range for %s", splitIndices.stream().filter(idx -> !this.contains(idx)).toList(), this)); } - List splitBoundaries = new LinkedList<>(); + List splitBoundaries = new LinkedList<>(); int previousIndex = start; for (int splitIndex : splitIndices) { @@ -102,19 +102,19 @@ public class Boundary implements Comparable { if (splitIndex == previousIndex) { continue; } - splitBoundaries.add(new Boundary(previousIndex, splitIndex)); + splitBoundaries.add(new TextRange(previousIndex, splitIndex)); previousIndex = splitIndex; } - splitBoundaries.add(new Boundary(previousIndex, end)); + splitBoundaries.add(new TextRange(previousIndex, end)); return splitBoundaries; } - public static Boundary merge(Collection boundaries) { + public static TextRange merge(Collection boundaries) { - int minStart = boundaries.stream().mapToInt(Boundary::start).min().orElseThrow(IllegalArgumentException::new); - int maxEnd = boundaries.stream().mapToInt(Boundary::end).max().orElseThrow(IllegalArgumentException::new); - return new Boundary(minStart, maxEnd); + int minStart = boundaries.stream().mapToInt(TextRange::start).min().orElseThrow(IllegalArgumentException::new); + int maxEnd = boundaries.stream().mapToInt(TextRange::end).max().orElseThrow(IllegalArgumentException::new); + return new TextRange(minStart, maxEnd); } @@ -126,12 +126,12 @@ public class Boundary implements Comparable { @Override - public int compareTo(Boundary boundary) { + public int compareTo(TextRange textRange) { - if (end < boundary.end() && start < boundary.start()) { + if (end < textRange.end() && start < textRange.start()) { return -1; } - if (start > boundary.start() && end > boundary.end()) { + if (start > textRange.start() && end > textRange.end()) { return 1; } @@ -145,7 +145,7 @@ public class Boundary implements Comparable { * @param textBlock TextBlock to check whitespaces against * @return trimmed boundary */ - public Boundary trim(TextBlock textBlock) { + public TextRange trim(TextBlock textBlock) { if (this.length() == 0) { return this; @@ -160,7 +160,7 @@ public class Boundary implements Comparable { trimmedEnd--; } - return new Boundary(trimmedStart, Math.max(trimmedEnd, trimmedStart)); + return new TextRange(trimmedStart, Math.max(trimmedEnd, trimmedStart)); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/MatchedRuleHolder.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/Entity.java similarity index 69% rename from redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/MatchedRuleHolder.java rename to redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/Entity.java index 8cfdc1ca..d35b939e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/MatchedRuleHolder.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/Entity.java @@ -7,38 +7,54 @@ import java.util.Set; import lombok.NonNull; -public interface MatchedRuleHolder { +public interface Entity { PriorityQueue getMatchedRuleList(); - boolean isIgnored(); + ManualChangeOverwrite getManualOverwrite(); - boolean isRemoved(); + // Don't use default accessor pattern (e.g. isIgnored()), as it might lead to errors in drools due to property-specific optimization of the drools planner. + default boolean ignored() { - - void setIgnored(boolean ignored); - - - void setRemoved(boolean ignored); - - - default boolean isApplied() { - - return getMatchedRule().isApplied(); + return getManualOverwrite().getIgnored().orElse(getMatchedRule().isIgnored()); } - default Set getReferences() { + default boolean removed() { + + return getManualOverwrite().getRemoved().orElse(getMatchedRule().isRemoved()); + } + + + default boolean resized() { + + return getManualOverwrite().getResized().orElse(false); + } + + + default boolean applied() { + + return getManualOverwrite().getApplied().orElse(getMatchedRule().isApplied()); + } + + + default boolean hasManualChanges() { + + return !getManualOverwrite().getManualChangeLog().isEmpty(); + } + + + default Set references() { return getMatchedRule().getReferences(); } - default boolean isActive() { + default boolean active() { - return !(isRemoved() || isIgnored()); + return !(removed() || ignored()); } @@ -82,15 +98,13 @@ public interface MatchedRuleHolder { default void remove(String ruleIdentifier, String reason) { - addMatchedRule(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).build()); - setRemoved(true); + addMatchedRule(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).removed(true).build()); } default void ignore(String ruleIdentifier, String reason) { - addMatchedRule(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).build()); - setIgnored(true); + addMatchedRule(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).ignored(true).build()); } @@ -121,7 +135,7 @@ public interface MatchedRuleHolder { } - default void applyWithReferences(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis, Collection references) { + default void applyWithReferences(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis, Collection references) { if (legalBasis.isBlank() || legalBasis.isEmpty()) { throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity"); @@ -136,7 +150,7 @@ public interface MatchedRuleHolder { } - default void skipWithReferences(@NonNull String ruleIdentifier, String reason, Collection references) { + default void skipWithReferences(@NonNull String ruleIdentifier, String reason, Collection references) { getMatchedRuleList().add(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).references(new HashSet<>(references)).build()); } @@ -150,6 +164,9 @@ public interface MatchedRuleHolder { default void addMatchedRules(Collection matchedRules) { + if (getMatchedRuleList().equals(matchedRules)) { + return; + } getMatchedRuleList().addAll(matchedRules); } @@ -168,4 +185,22 @@ public interface MatchedRuleHolder { return getMatchedRuleList().peek(); } + + default String buildReasonWithManualChangeDescriptions() { + + if (getManualOverwrite().getDescriptions().isEmpty()) { + return getMatchedRule().getReason(); + } + if (getMatchedRule().getReason().isEmpty()) { + return String.join(", ", getManualOverwrite().getDescriptions()); + } + return getMatchedRule().getReason() + ", " + String.join(", ", getManualOverwrite().getDescriptions()); + } + + + default String legalBasis() { + + return getManualOverwrite().getLegalBasis().orElse(getMatchedRule().getLegalBasis()); + } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/ManualChangeOverwrite.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/ManualChangeOverwrite.java new file mode 100644 index 00000000..d7c267eb --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/ManualChangeOverwrite.java @@ -0,0 +1,200 @@ +package com.iqser.red.service.redaction.v1.server.document.graph.entity; + +import java.util.Collections; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; + +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.NoArgsConstructor; +import lombok.experimental.FieldDefaults; + +@Builder +@NoArgsConstructor +@AllArgsConstructor +@FieldDefaults(level = AccessLevel.PRIVATE) +public class ManualChangeOverwrite { + + private static final Map, String> MANUAL_CHANGE_DESCRIPTIONS = Map.of(// + ManualRedactionEntry.class, "created by manual change", // + ManualLegalBasisChange.class, "legal basis was manually changed", // + ManualResizeRedaction.class, "resized by manual override", // + ManualForceRedaction.class, "forced by manual override", // + IdRemoval.class, "removed by manual override", // + ManualImageRecategorization.class, "recategorized by manual override"); + + List manualChanges = new LinkedList<>(); + boolean changed; + List descriptions; + String type; + String legalBasis; + String section; + String value; + Boolean applied; + Boolean removed; + Boolean ignored; + Boolean resized; + Boolean recategorized; + + + public void calculateCurrentOverride() { + + if (!changed) { + return; + } + List sortedManualChanges = getManualChangeLog(); + updateFields(sortedManualChanges); + } + + + public List getManualChangeLog() { + + if (!changed) { + return manualChanges; + } + manualChanges.sort(Comparator.comparing(BaseAnnotation::getRequestDate)); + updateFields(manualChanges); + // make list unmodifiable. + return manualChanges.stream().toList(); + } + + + private void updateFields(List sortedManualChanges) { + + descriptions = new LinkedList<>(); + + for (BaseAnnotation manualChange : sortedManualChanges) { + // ManualRedactionEntries are created prior to rule execution in analysis service. + + if (manualChange instanceof IdRemoval) { + applied = false; + ignored = true; + } + + if (manualChange instanceof ManualForceRedaction manualForceRedaction) { + removed = false; + ignored = false; + applied = true; + legalBasis = manualForceRedaction.getLegalBasis(); + } + + if (manualChange instanceof ManualLegalBasisChange manualLegalBasisChange) { + section = manualLegalBasisChange.getSection(); + legalBasis = manualLegalBasisChange.getLegalBasis(); + value = manualLegalBasisChange.getValue(); + } + + if (manualChange instanceof ManualResizeRedaction) { + // resizing logic happens in ManualChangesApplicationService. + resized = true; + } + + if (manualChange instanceof ManualImageRecategorization recategorization) { + // recategorization logic happens in ManualChangesApplicationService. + recategorized = true; + // this is only relevant for ManualEntities. Image and TextEntity is recategorized in the ManualChangesApplicationService. + type = recategorization.getType(); + } + + descriptions.add(MANUAL_CHANGE_DESCRIPTIONS.get(manualChange.getClass())); + } + changed = false; + } + + + public void addChange(BaseAnnotation manualChange) { + + changed = true; + manualChanges.add(manualChange); + } + + + public void addChanges(List manualChangeLog) { + + changed = true; + manualChanges.addAll(manualChangeLog); + } + + + public Optional getLegalBasis() { + + calculateCurrentOverride(); + return legalBasis == null ? Optional.empty() : Optional.of(legalBasis); + } + + + public Optional getType() { + + calculateCurrentOverride(); + return type == null ? Optional.empty() : Optional.of(type); + } + + + public Optional getSection() { + + calculateCurrentOverride(); + return section == null ? Optional.empty() : Optional.of(section); + } + + + public Optional getValue() { + + calculateCurrentOverride(); + return value == null ? Optional.empty() : Optional.of(value); + } + + + public Optional getApplied() { + + calculateCurrentOverride(); + return applied == null ? Optional.empty() : Optional.of(applied); + } + + + public Optional getRemoved() { + + calculateCurrentOverride(); + return removed == null ? Optional.empty() : Optional.of(removed); + } + + + public Optional getIgnored() { + + calculateCurrentOverride(); + return ignored == null ? Optional.empty() : Optional.of(ignored); + } + + + public Optional getResized() { + + calculateCurrentOverride(); + return resized == null ? Optional.empty() : Optional.of(resized); + } + + + public Optional getRecategorized() { + + calculateCurrentOverride(); + return recategorized == null ? Optional.empty() : Optional.of(recategorized); + } + + + public List getDescriptions() { + + calculateCurrentOverride(); + return descriptions == null ? Collections.emptyList() : descriptions; + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/MatchedRule.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/MatchedRule.java index ee87c15d..107debb6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/MatchedRule.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/MatchedRule.java @@ -1,6 +1,7 @@ package com.iqser.red.service.redaction.v1.server.document.graph.entity; import java.util.Collections; +import java.util.List; import java.util.Objects; import java.util.Set; @@ -18,6 +19,10 @@ import lombok.experimental.FieldDefaults; @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) public final class MatchedRule implements Comparable { + public static final String FINAL_TYPE = "FINAL"; + public static final String ELIMINATION_RULE_TYPE = "X"; + private static final List RULE_TYPE_PRIORITIES = List.of(FINAL_TYPE, ELIMINATION_RULE_TYPE); + @Builder.Default RuleIdentifier ruleIdentifier = RuleIdentifier.empty(); @Builder.Default @@ -26,8 +31,11 @@ public final class MatchedRule implements Comparable { String legalBasis = ""; boolean applied; boolean writeValueWithLineBreaks; + boolean removed; + boolean ignored; + boolean resized; @Builder.Default - Set references = Collections.emptySet(); + Set references = Collections.emptySet(); public static MatchedRule empty() { @@ -39,32 +47,40 @@ public final class MatchedRule implements Comparable { @Override public int compareTo(MatchedRule matchedRule) { + // Only the highest ranked rule is actually applied, this method defines the highest order. + // First, it compares the Rule Type, RULE_TYPE_PRIORITIES defines the order of types. + // Types not in the list have the lowest priority. + // The ones in the list are technical exceptions and should override any other Rule. + // Aside from them Entities should never match from more than one type! + // E.g. a CBI_author entity should **always** only match CBI.*.* rules. + // Otherwise, something went wrong with the rules. :) RuleIdentifier otherRuleIdentifier = matchedRule.getRuleIdentifier(); - if (!Objects.equals(ruleIdentifier.type(), otherRuleIdentifier.type())) { - if (Objects.equals(otherRuleIdentifier.type(), "MAN")) { - return 1; - } - if (Objects.equals(ruleIdentifier.type(), "MAN")) { - return -1; - } - if (Objects.equals(otherRuleIdentifier.type(), "X")) { - return 1; - } - if (Objects.equals(ruleIdentifier.type(), "X")) { - return -1; - } + boolean thisInList = RULE_TYPE_PRIORITIES.contains(this.getRuleIdentifier().type()); + boolean otherInList = RULE_TYPE_PRIORITIES.contains(otherRuleIdentifier.type()); + + // Compare the types + if (thisInList && !otherInList) { + return -1; + } else if (!thisInList && otherInList) { + return 1; + } else if (thisInList && otherInList) { + int thisIndex = RULE_TYPE_PRIORITIES.indexOf(this.getRuleIdentifier().type()); + int otherIndex = RULE_TYPE_PRIORITIES.indexOf(otherRuleIdentifier.type()); + return Integer.compare(thisIndex, otherIndex); } + // Then compare the unit if (!Objects.equals(otherRuleIdentifier.unit(), getRuleIdentifier().unit())) { - return otherRuleIdentifier.unit() - ruleIdentifier.unit(); + return Integer.compare(otherRuleIdentifier.unit(), ruleIdentifier.unit()); } - return otherRuleIdentifier.id() - ruleIdentifier.id(); + // Then compare the id inside the unit + return Integer.compare(otherRuleIdentifier.id(), ruleIdentifier.id()); } @Override public String toString() { - return "MatchedRule[" + "ruleIdentifier=" + ruleIdentifier + ", " + "reason=" + reason + ", " + "legalBasis=" + legalBasis + ", " + "applied=" + applied + ", " + "writeValueWithLineBreaks=" + writeValueWithLineBreaks + ", " + "references=" + references + ']'; + return "MatchedRule[ruleIdentifier=" + ruleIdentifier + ", reason=" + reason + ", legalBasis=" + legalBasis + ", applied=" + applied + ", writeValueWithLineBreaks=" + writeValueWithLineBreaks + ", references=" + references + ']'; } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/RedactionPosition.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/PositionOnPage.java similarity index 74% rename from redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/RedactionPosition.java rename to redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/PositionOnPage.java index 726fb32b..195f3774 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/RedactionPosition.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/PositionOnPage.java @@ -13,12 +13,13 @@ import lombok.experimental.FieldDefaults; @Data @AllArgsConstructor @FieldDefaults(level = AccessLevel.PRIVATE) -public class RedactionPosition { +public class PositionOnPage { + // Each entry in this list corresponds to an entry in the redaction log, this means: + // A single entity might be represented by multiple redaction log entries + // This is due to the RedactionLog only being able to handle a single page per entry. final String id; Page page; - // Each entry in this list corresponds to an entry in the redaction log, this means: - // An entity might be represented by multiple redaction log entries List rectanglePerLine; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/RedactionEntity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/TextEntity.java similarity index 68% rename from redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/RedactionEntity.java rename to redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/TextEntity.java index ed30acf3..b2615bc9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/RedactionEntity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/entity/TextEntity.java @@ -11,9 +11,9 @@ import java.util.PriorityQueue; import java.util.Set; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine; -import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page; +import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder; import lombok.AccessLevel; @@ -28,29 +28,28 @@ import lombok.experimental.FieldDefaults; @AllArgsConstructor @FieldDefaults(level = AccessLevel.PRIVATE) @EqualsAndHashCode(onlyExplicitlyIncluded = true) -public class RedactionEntity implements MatchedRuleHolder { +public class TextEntity implements Entity { - // initial values + // primary key @EqualsAndHashCode.Include - final Boundary boundary; + final TextRange textRange; @EqualsAndHashCode.Include final String type; @EqualsAndHashCode.Include final EntityType entityType; + // primary key end - // empty defaults - boolean removed; - boolean ignored; + @Builder.Default + final PriorityQueue matchedRuleList = new PriorityQueue<>(); + @Builder.Default + final ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite(); - boolean resized; - boolean skipRemoveEntitiesContainedInLarger; boolean dictionaryEntry; boolean dossierDictionaryEntry; + @Builder.Default Set engines = new HashSet<>(); - @Builder.Default - PriorityQueue matchedRuleList = new PriorityQueue<>(); // inferred on graph insertion String value; @@ -58,15 +57,15 @@ public class RedactionEntity implements MatchedRuleHolder { String textAfter; @Builder.Default Set pages = new HashSet<>(); - List redactionPositionsPerPage; + List positionsOnPagePerPage; @Builder.Default List intersectingNodes = new LinkedList<>(); SemanticNode deepestFullyContainingNode; - public static RedactionEntity initialEntityNode(Boundary boundary, String type, EntityType entityType) { + public static TextEntity initialEntityNode(TextRange textRange, String type, EntityType entityType) { - return RedactionEntity.builder().type(type).entityType(entityType).boundary(boundary).build(); + return TextEntity.builder().type(type).entityType(entityType).textRange(textRange).build(); } @@ -102,7 +101,7 @@ public class RedactionEntity implements MatchedRuleHolder { public String getValueWithLineBreaks() { - return getDeepestFullyContainingNode().getTextBlock().subSequenceWithLineBreaks(getBoundary()); + return getDeepestFullyContainingNode().getTextBlock().subSequenceWithLineBreaks(getTextRange()); } @@ -113,14 +112,14 @@ public class RedactionEntity implements MatchedRuleHolder { intersectingNodes = new LinkedList<>(); deepestFullyContainingNode = null; pages = new HashSet<>(); - removed = true; + remove("FINAL.0.0", "removed completely"); } - public List getRedactionPositionsPerPage() { + public List getPositionsOnPagePerPage() { - if (redactionPositionsPerPage == null || redactionPositionsPerPage.isEmpty()) { - Map> rectanglesPerLinePerPage = deepestFullyContainingNode.getTextBlock().getPositionsPerPage(boundary); + if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) { + Map> rectanglesPerLinePerPage = deepestFullyContainingNode.getTextBlock().getPositionsPerPage(textRange); Page firstPage = rectanglesPerLinePerPage.keySet() .stream() @@ -128,37 +127,37 @@ public class RedactionEntity implements MatchedRuleHolder { .orElseThrow(() -> new RuntimeException("No Positions found on any page!")); String id = IdBuilder.buildId(pages, rectanglesPerLinePerPage.values().stream().flatMap(Collection::stream).toList(), type, entityType.name()); - redactionPositionsPerPage = rectanglesPerLinePerPage.entrySet().stream().map(entry -> buildRedactionPosition(firstPage, id, entry)).toList(); + positionsOnPagePerPage = rectanglesPerLinePerPage.entrySet().stream().map(entry -> buildPositionOnPage(firstPage, id, entry)).toList(); } - return redactionPositionsPerPage; + return positionsOnPagePerPage; } - private static RedactionPosition buildRedactionPosition(Page firstPage, String id, Map.Entry> entry) { + private static PositionOnPage buildPositionOnPage(Page firstPage, String id, Map.Entry> entry) { if (entry.getKey().equals(firstPage)) { - return new RedactionPosition(id, entry.getKey(), entry.getValue()); + return new PositionOnPage(id, entry.getKey(), entry.getValue()); } else { - return new RedactionPosition(id + "-" + entry.getKey().getNumber(), entry.getKey(), entry.getValue()); + return new PositionOnPage(id + "-" + entry.getKey().getNumber(), entry.getKey(), entry.getValue()); } } - public boolean containedBy(RedactionEntity redactionEntity) { + public boolean containedBy(TextEntity textEntity) { - return this.boundary.containedBy(redactionEntity.getBoundary()); + return this.textRange.containedBy(textEntity.getTextRange()); } - public boolean contains(RedactionEntity redactionEntity) { + public boolean contains(TextEntity textEntity) { - return this.boundary.contains(redactionEntity.getBoundary()); + return this.textRange.contains(textEntity.getTextRange()); } - public boolean intersects(RedactionEntity redactionEntity) { + public boolean intersects(TextEntity textEntity) { - return this.boundary.intersects(redactionEntity.getBoundary()); + return this.textRange.intersects(textEntity.getTextRange()); } @@ -176,7 +175,7 @@ public class RedactionEntity implements MatchedRuleHolder { public boolean matchesAnnotationId(String manualRedactionId) { - return getRedactionPositionsPerPage().stream().anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId)); + return getPositionsOnPagePerPage().stream().anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId)); } @@ -187,7 +186,7 @@ public class RedactionEntity implements MatchedRuleHolder { sb.append("Entity[\""); sb.append(value); sb.append("\", "); - sb.append(boundary); + sb.append(textRange); sb.append(", pages["); pages.forEach(page -> { sb.append(page.getNumber()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Document.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Document.java index 60919d36..e1fd8481 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Document.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Document.java @@ -11,7 +11,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector; @@ -34,7 +34,7 @@ public class Document implements GenericSemanticNode { Integer numberOfPages; TextBlock textBlock; @Builder.Default - Set entities = new HashSet<>(); + Set entities = new HashSet<>(); @Builder.Default static final SectionIdentifier sectionIdentifier = SectionIdentifier.document(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Footer.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Footer.java index 06bd3bff..f62e3d99 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Footer.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Footer.java @@ -5,7 +5,7 @@ import java.util.List; import java.util.Set; import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock; import lombok.AccessLevel; @@ -34,7 +34,7 @@ public class Footer implements GenericSemanticNode { @Builder.Default @EqualsAndHashCode.Exclude - Set entities = new HashSet<>(); + Set entities = new HashSet<>(); @Override diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Header.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Header.java index 28f82e08..41b1b549 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Header.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Header.java @@ -4,7 +4,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock; @@ -34,7 +34,7 @@ public class Header implements GenericSemanticNode { @Builder.Default @EqualsAndHashCode.Exclude - Set entities = new HashSet<>(); + Set entities = new HashSet<>(); @Override diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Headline.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Headline.java index 1b495d6e..b57e7873 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Headline.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Headline.java @@ -5,7 +5,7 @@ import java.util.List; import java.util.Set; import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.AtomicTextBlock; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock; @@ -33,7 +33,7 @@ public class Headline implements GenericSemanticNode { @Builder.Default @EqualsAndHashCode.Exclude - Set entities = new HashSet<>(); + Set entities = new HashSet<>(); @Override diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Image.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Image.java index 39d610c1..b61cfc23 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Image.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Image.java @@ -9,10 +9,11 @@ import java.util.Map; import java.util.PriorityQueue; import java.util.Set; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.Entity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.ManualChangeOverwrite; import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRuleHolder; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector; @@ -29,7 +30,7 @@ import lombok.experimental.FieldDefaults; @AllArgsConstructor @NoArgsConstructor @FieldDefaults(level = AccessLevel.PRIVATE) -public class Image implements GenericSemanticNode, MatchedRuleHolder { +public class Image implements GenericSemanticNode, Entity { List treeId; String id; @@ -38,12 +39,12 @@ public class Image implements GenericSemanticNode, MatchedRuleHolder { boolean transparent; Rectangle2D position; - boolean removed; - boolean ignored; - @Builder.Default PriorityQueue matchedRuleList = new PriorityQueue<>(); + @Builder.Default + ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite(); + @EqualsAndHashCode.Exclude Page page; @@ -52,7 +53,7 @@ public class Image implements GenericSemanticNode, MatchedRuleHolder { @Builder.Default @EqualsAndHashCode.Exclude - Set entities = new HashSet<>(); + Set entities = new HashSet<>(); @Override diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Page.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Page.java index a0abb47f..4ea63a53 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Page.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Page.java @@ -1,11 +1,10 @@ package com.iqser.red.service.redaction.v1.server.document.graph.nodes; import java.util.HashSet; -import java.util.LinkedList; import java.util.List; import java.util.Set; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector; @@ -40,7 +39,7 @@ public class Page { @Builder.Default @EqualsAndHashCode.Exclude - Set entities = new HashSet<>(); + Set entities = new HashSet<>(); @Builder.Default @EqualsAndHashCode.Exclude diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Paragraph.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Paragraph.java index bc0f26aa..4fb062c5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Paragraph.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Paragraph.java @@ -5,7 +5,7 @@ import java.util.List; import java.util.Set; import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock; import lombok.AccessLevel; @@ -29,7 +29,7 @@ public class Paragraph implements GenericSemanticNode { @Builder.Default @EqualsAndHashCode.Exclude - Set entities = new HashSet<>(); + Set entities = new HashSet<>(); @Override diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Section.java index 3e38c156..df76d026 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Section.java @@ -4,7 +4,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector; @@ -32,7 +32,7 @@ public class Section implements GenericSemanticNode { @Builder.Default @EqualsAndHashCode.Exclude - Set entities = new HashSet<>(); + Set entities = new HashSet<>(); @Override diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/SemanticNode.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/SemanticNode.java index 77465bd8..71b24bb5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/SemanticNode.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/SemanticNode.java @@ -13,9 +13,9 @@ import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.AtomicTextBlock; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock; import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations; @@ -43,11 +43,11 @@ public interface SemanticNode { /** * Any Node maintains its own Set of Entities. - * This Set contains all Entities whose boundary intersects the boundary of this node. + * This Set contains all Entities whose TextRange intersects the TextRange of this node. * * @return Set of all Entities associated with this Node */ - Set getEntities(); + Set getEntities(); /** @@ -72,16 +72,16 @@ public interface SemanticNode { /** - * Each AtomicTextBlock is assigned a page, so to get the pages for this boundary, it collects the PageNodes from each AtomicTextBlock belonging to this node's TextBlock. + * Each AtomicTextBlock is assigned a page, so to get the pages for this TextRange, it collects the PageNodes from each AtomicTextBlock belonging to this node's TextBlock. * * @return Set of PageNodes this node appears on. */ - default Set getPages(Boundary boundary) { + default Set getPages(TextRange textRange) { - if (!getBoundary().contains(boundary)) { - throw new IllegalArgumentException(format("%s which was used to query for pages is not contained in the %s of this node!", boundary, getBoundary())); + if (!getTextRange().contains(textRange)) { + throw new IllegalArgumentException(format("%s which was used to query for pages is not contained in the %s of this node!", textRange, getTextRange())); } - return getTextBlock().getPages(boundary); + return getTextBlock().getPages(textRange); } @@ -215,7 +215,7 @@ public interface SemanticNode { */ default boolean hasEntitiesOfType(String type) { - return getEntities().stream().filter(RedactionEntity::isActive).anyMatch(redactionEntity -> redactionEntity.getType().equals(type)); + return getEntities().stream().filter(TextEntity::active).anyMatch(redactionEntity -> redactionEntity.getType().equals(type)); } @@ -228,7 +228,7 @@ public interface SemanticNode { */ default boolean hasEntitiesOfAnyType(String... types) { - return getEntities().stream().filter(RedactionEntity::isActive).anyMatch(redactionEntity -> Arrays.stream(types).anyMatch(type -> redactionEntity.getType().equals(type))); + return getEntities().stream().filter(TextEntity::active).anyMatch(redactionEntity -> Arrays.stream(types).anyMatch(type -> redactionEntity.getType().equals(type))); } @@ -242,8 +242,8 @@ public interface SemanticNode { default boolean hasEntitiesOfAllTypes(String... types) { return getEntities().stream() - .filter(RedactionEntity::isActive) - .map(RedactionEntity::getType) + .filter(TextEntity::active) + .map(TextEntity::getType) .collect(Collectors.toUnmodifiableSet()) .containsAll(Arrays.stream(types).toList()); } @@ -256,9 +256,9 @@ public interface SemanticNode { * @param type string representing the type of entities to return * @return List of RedactionEntities of any the type */ - default List getEntitiesOfType(String type) { + default List getEntitiesOfType(String type) { - return getEntities().stream().filter(RedactionEntity::isActive).filter(redactionEntity -> redactionEntity.getType().equals(type)).toList(); + return getEntities().stream().filter(TextEntity::active).filter(redactionEntity -> redactionEntity.getType().equals(type)).toList(); } @@ -269,9 +269,9 @@ public interface SemanticNode { * @param types A list of strings representing the types of entities to return * @return List of RedactionEntities of any provided type */ - default List getEntitiesOfType(List types) { + default List getEntitiesOfType(List types) { - return getEntities().stream().filter(RedactionEntity::isActive).filter(redactionEntity -> redactionEntity.isAnyType(types)).toList(); + return getEntities().stream().filter(TextEntity::active).filter(redactionEntity -> redactionEntity.isAnyType(types)).toList(); } @@ -282,9 +282,9 @@ public interface SemanticNode { * @param types A list of strings representing the types of entities to return * @return List of RedactionEntities that match any of the provided types */ - default List getEntitiesOfType(String... types) { + default List getEntitiesOfType(String... types) { - return getEntities().stream().filter(RedactionEntity::isActive).filter(redactionEntity -> redactionEntity.isAnyType(Arrays.stream(types).toList())).toList(); + return getEntities().stream().filter(TextEntity::active).filter(redactionEntity -> redactionEntity.isAnyType(Arrays.stream(types).toList())).toList(); } @@ -440,22 +440,22 @@ public interface SemanticNode { /** - * This function is used during insertion of EntityNodes into the graph, it checks if the boundary of the RedactionEntity intersects or even contains the RedactionEntity. + * This function is used during insertion of EntityNodes into the graph, it checks if the TextRange of the RedactionEntity intersects or even contains the RedactionEntity. * It sets the fields accordingly and recursively calls this function on all its children. * - * @param redactionEntity RedactionEntity, which is being inserted into the graph + * @param textEntity RedactionEntity, which is being inserted into the graph */ - default void addThisToEntityIfIntersects(RedactionEntity redactionEntity) { + default void addThisToEntityIfIntersects(TextEntity textEntity) { TextBlock textBlock = getTextBlock(); - if (textBlock.getBoundary().intersects(redactionEntity.getBoundary())) { - if (textBlock.containsBoundary(redactionEntity.getBoundary())) { - redactionEntity.setDeepestFullyContainingNode(this); + if (textBlock.getTextRange().intersects(textEntity.getTextRange())) { + if (textBlock.containsTextRange(textEntity.getTextRange())) { + textEntity.setDeepestFullyContainingNode(this); } - redactionEntity.addIntersectingNode(this); - streamChildren().filter(semanticNode -> semanticNode.getBoundary().intersects(redactionEntity.getBoundary())) - .forEach(node -> node.addThisToEntityIfIntersects(redactionEntity)); + textEntity.addIntersectingNode(this); + streamChildren().filter(semanticNode -> semanticNode.getTextRange().intersects(textEntity.getTextRange())) + .forEach(node -> node.addThisToEntityIfIntersects(textEntity)); } } @@ -505,13 +505,13 @@ public interface SemanticNode { /** - * The Boundary is the start and end string offsets in the reading order of the document. + * The TextRange is the start and end string offsets in the reading order of the document. * - * @return Boundary of this Node's TextBlock + * @return TextRange of this Node's TextBlock */ - default Boundary getBoundary() { + default TextRange getTextRange() { - return getTextBlock().getBoundary(); + return getTextBlock().getTextRange(); } @@ -522,7 +522,7 @@ public interface SemanticNode { */ default int length() { - return getBoundary().length(); + return getTextRange().length(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Table.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Table.java index 0366b1d2..6b9ac3de 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Table.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/Table.java @@ -11,7 +11,7 @@ import java.util.stream.IntStream; import java.util.stream.Stream; import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector; @@ -38,7 +38,7 @@ public class Table implements SemanticNode { @Builder.Default @EqualsAndHashCode.Exclude - Set entities = new HashSet<>(); + Set entities = new HashSet<>(); /** @@ -47,7 +47,7 @@ public class Table implements SemanticNode { * @param strings Strings to check whether a row contains them * @return Stream of all entities in this table, that appear in a row, which contains any of the provided strings */ - public Stream streamEntitiesWhereRowContainsStringsIgnoreCase(List strings) { + public Stream streamEntitiesWhereRowContainsStringsIgnoreCase(List strings) { return IntStream.range(0, numberOfRows) .boxed() @@ -79,7 +79,7 @@ public class Table implements SemanticNode { * @param value the string which the table cell should contain * @return a stream of all entities, which appear in a row where at least one cell has the provided header and the provided value. */ - public Stream streamEntitiesWhereRowHasHeaderAndValue(String header, String value) { + public Stream streamEntitiesWhereRowHasHeaderAndValue(String header, String value) { List vertebrateStudyCols = streamHeaders().filter(headerNode -> headerNode.containsString(header)).map(TableCell::getCol).toList(); return streamTableCells().filter(tableCellNode -> vertebrateStudyCols.stream() @@ -94,7 +94,7 @@ public class Table implements SemanticNode { * @param values the strings which the table cell should contain * @return a stream of all entities, which appear in a row where at least one cell has the provided header and any provided value. */ - public Stream streamEntitiesWhereRowHasHeaderAndAnyValue(String header, List values) { + public Stream streamEntitiesWhereRowHasHeaderAndAnyValue(String header, List values) { List colsWithHeader = streamHeaders().filter(headerNode -> headerNode.containsString(header)).map(TableCell::getCol).toList(); return streamTableCells().filter(tableCellNode -> colsWithHeader.stream() @@ -109,12 +109,12 @@ public class Table implements SemanticNode { * @param types type strings to check whether a row contains an entity like them * @return Stream of all entities in this table, that appear in a row, which contains at least one entity with any of the provided types. */ - public Stream streamEntitiesWhereRowContainsEntitiesOfType(List types) { + public Stream streamEntitiesWhereRowContainsEntitiesOfType(List types) { List rowsWithEntityOfType = getEntities().stream() - .filter(RedactionEntity::isActive) + .filter(TextEntity::active) .filter(redactionEntity -> types.stream().anyMatch(type -> type.equals(redactionEntity.getType()))) - .map(RedactionEntity::getIntersectingNodes) + .map(TextEntity::getIntersectingNodes) .filter(node -> node instanceof TableCell) .map(node -> (TableCell) node) .map(TableCell::getRow) @@ -131,13 +131,13 @@ public class Table implements SemanticNode { * @param types type strings to check whether a row contains an entity like them * @return Stream of all entities in this table, that appear in a row, which contains at least one entity with any of the provided types. */ - public Stream streamEntitiesWhereRowContainsNoEntitiesOfType(List types) { + public Stream streamEntitiesWhereRowContainsNoEntitiesOfType(List types) { return IntStream.range(0, numberOfRows) .boxed() .filter(rowNumber -> streamRow(rowNumber).map(TableCell::getEntities) .flatMap(Collection::stream) - .filter(RedactionEntity::isActive) + .filter(TextEntity::active) .noneMatch(entity -> types.contains(entity.getType()))) .flatMap(this::streamRow) .map(TableCell::getEntities) @@ -290,12 +290,12 @@ public class Table implements SemanticNode { * Ignores Entity with ignored == true or removed == true. * * @param type the type of entities to search for - * @param redactionEntity the entity, which appears in the row to search + * @param textEntity the entity, which appears in the row to search * @return List of all entities of the provided type, which appear in the same row that the provided entity appears in. */ - public List getEntitiesOfTypeInSameRow(String type, RedactionEntity redactionEntity) { + public List getEntitiesOfTypeInSameRow(String type, TextEntity textEntity) { - return redactionEntity.getIntersectingNodes() + return textEntity.getIntersectingNodes() .stream() .filter(node -> node instanceof TableCell) .map(node -> (TableCell) node) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/TableCell.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/TableCell.java index 7a959d35..4775e110 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/TableCell.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/nodes/TableCell.java @@ -7,7 +7,7 @@ import java.util.List; import java.util.Map; import java.util.Set; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector; @@ -41,7 +41,7 @@ public class TableCell implements GenericSemanticNode { @Builder.Default @EqualsAndHashCode.Exclude - Set entities = new HashSet<>(); + Set entities = new HashSet<>(); @Override diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/textblock/AtomicTextBlock.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/textblock/AtomicTextBlock.java index 83bd426b..4699ee08 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/textblock/AtomicTextBlock.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/textblock/AtomicTextBlock.java @@ -12,7 +12,7 @@ import java.util.Map; import java.util.Set; import java.util.stream.Collectors; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations; @@ -38,7 +38,7 @@ public class AtomicTextBlock implements TextBlock { Page page; //string coordinates - Boundary boundary; + TextRange textRange; String searchText; List lineBreaks; @@ -61,7 +61,7 @@ public class AtomicTextBlock implements TextBlock { return AtomicTextBlock.builder() .id(textBlockIdx) - .boundary(new Boundary(stringOffset, stringOffset)) + .textRange(new TextRange(stringOffset, stringOffset)) .searchText("") .lineBreaks(Collections.emptyList()) .page(page) @@ -82,7 +82,7 @@ public class AtomicTextBlock implements TextBlock { .id(atomicTextBlockData.getId()) .numberOnPage(atomicTextBlockData.getNumberOnPage()) .page(page) - .boundary(new Boundary(atomicTextBlockData.getStart(), atomicTextBlockData.getEnd())) + .textRange(new TextRange(atomicTextBlockData.getStart(), atomicTextBlockData.getEnd())) .searchText(atomicTextBlockData.getSearchText()) .lineBreaks(Arrays.stream(atomicTextBlockData.getLineBreaks()).boxed().toList()) .stringIdxToPositionIdx(Arrays.stream(atomicPositionBlockData.getStringIdxToPositionIdx()).boxed().toList()) @@ -98,20 +98,20 @@ public class AtomicTextBlock implements TextBlock { } - public Boundary getLineBoundary(int lineNumber) { + public TextRange getLineTextRange(int lineNumber) { if (lineNumber >= numberOfLines() || lineNumber < 0) { - return new Boundary(boundary.start(), boundary.start()); + return new TextRange(textRange.start(), textRange.start()); } if (numberOfLines() == 1) { - return boundary; + return textRange; } if (lineNumber == 0) { - return new Boundary(boundary.start(), lineBreaks.get(0) + boundary.start()); + return new TextRange(textRange.start(), lineBreaks.get(0) + textRange.start()); } else if (lineNumber == numberOfLines() - 1) { - return new Boundary(lineBreaks.get(lineBreaks.size() - 1) + boundary.start(), boundary.end()); + return new TextRange(lineBreaks.get(lineBreaks.size() - 1) + textRange.start(), textRange.end()); } - return new Boundary(lineBreaks.get(lineNumber - 1) + boundary.start(), lineBreaks.get(lineNumber) + boundary.start()); + return new TextRange(lineBreaks.get(lineNumber - 1) + textRange.start(), lineBreaks.get(lineNumber) + textRange.start()); } @@ -126,9 +126,9 @@ public class AtomicTextBlock implements TextBlock { public int getNextLinebreak(int fromIndex) { return lineBreaks.stream()// - .filter(linebreak -> linebreak > fromIndex - boundary.start()) // + .filter(linebreak -> linebreak > fromIndex - textRange.start()) // .findFirst() // - .orElse(searchText.length()) + boundary.start(); + .orElse(searchText.length()) + textRange.start(); } @@ -136,43 +136,43 @@ public class AtomicTextBlock implements TextBlock { public int getPreviousLinebreak(int fromIndex) { return lineBreaks.stream()// - .filter(linebreak -> linebreak <= fromIndex - boundary.start())// + .filter(linebreak -> linebreak <= fromIndex - textRange.start())// .reduce((a, b) -> b)// - .orElse(0) + boundary.start(); + .orElse(0) + textRange.start(); } @Override public Rectangle2D getPosition(int stringIdx) { - return positions.get(stringIdxToPositionIdx.get(stringIdx - boundary.start())); + return positions.get(stringIdxToPositionIdx.get(stringIdx - textRange.start())); } @Override - public List getPositions(Boundary stringBoundary) { + public List getPositions(TextRange stringTextRange) { - if (!containsBoundary(stringBoundary)) { - throw new IndexOutOfBoundsException(format("%s is out of bounds for %s", stringBoundary, this.boundary)); + if (!containsTextRange(stringTextRange)) { + throw new IndexOutOfBoundsException(format("%s is out of bounds for %s", stringTextRange, this.textRange)); } - if (stringBoundary.length() == 0) { + if (stringTextRange.length() == 0) { return Collections.emptyList(); } - int startPositionIdx = stringIdxToPositionIdx.get(stringBoundary.start() - this.boundary.start()); + int startPositionIdx = stringIdxToPositionIdx.get(stringTextRange.start() - this.textRange.start()); - if (stringBoundary.end() == this.boundary.end()) { + if (stringTextRange.end() == this.textRange.end()) { return positions.subList(startPositionIdx, positions.size()); } - return positions.subList(startPositionIdx, stringIdxToPositionIdx.get(stringBoundary.end() - this.boundary.start())); + return positions.subList(startPositionIdx, stringIdxToPositionIdx.get(stringTextRange.end() - this.textRange.start())); } - public Map> getPositionsPerPage(Boundary stringBoundary) { + public Map> getPositionsPerPage(TextRange stringTextRange) { - List rectanglesPerLine = stringBoundary.split(getAllLineBreaksInBoundary(stringBoundary)) + List rectanglesPerLine = stringTextRange.split(getAllLineBreaksInBoundary(stringTextRange)) .stream() .map(this::getPositions) .map(RectangleTransformations::rectangleBBoxWithGaps) @@ -185,18 +185,18 @@ public class AtomicTextBlock implements TextBlock { @Override - public String subSequenceWithLineBreaks(Boundary boundary) { + public String subSequenceWithLineBreaks(TextRange textRange) { - if (boundary.length() == 0 || !getBoundary().contains(boundary)) { + if (textRange.length() == 0 || !getTextRange().contains(textRange)) { return ""; } - Set lbInBoundary = lineBreaks.stream().map(i -> i + boundary.start()).filter(boundary::contains).collect(Collectors.toSet()); - if (boundary.end() == getBoundary().end()) { - lbInBoundary.add(getBoundary().end()); + Set lbInBoundary = lineBreaks.stream().map(i -> i + textRange.start()).filter(textRange::contains).collect(Collectors.toSet()); + if (textRange.end() == getTextRange().end()) { + lbInBoundary.add(getTextRange().end()); } StringBuilder sb = new StringBuilder(); - for (int i = boundary.start(); i < boundary.end(); i++) { + for (int i = textRange.start(); i < textRange.end(); i++) { char character = this.charAt(i); if (lbInBoundary.contains(i + 1)) { // always plus one, due to the linebreaks being an exclusive end index @@ -215,9 +215,9 @@ public class AtomicTextBlock implements TextBlock { } - private List getAllLineBreaksInBoundary(Boundary boundary) { + private List getAllLineBreaksInBoundary(TextRange textRange) { - return getLineBreaks().stream().map(linebreak -> linebreak + this.boundary.start()).filter(boundary::contains).toList(); + return getLineBreaks().stream().map(linebreak -> linebreak + this.textRange.start()).filter(textRange::contains).toList(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/textblock/ConcatenatedTextBlock.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/textblock/ConcatenatedTextBlock.java index 392f968d..99089681 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/textblock/ConcatenatedTextBlock.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/textblock/ConcatenatedTextBlock.java @@ -10,7 +10,7 @@ import java.util.List; import java.util.Map; import java.util.stream.Stream; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page; import lombok.AccessLevel; @@ -23,7 +23,7 @@ public class ConcatenatedTextBlock implements TextBlock { List atomicTextBlocks; String searchText; - Boundary boundary; + TextRange textRange; public static ConcatenatedTextBlock empty() { @@ -36,12 +36,12 @@ public class ConcatenatedTextBlock implements TextBlock { this.atomicTextBlocks = new LinkedList<>(); if (atomicTextBlocks.isEmpty()) { - boundary = new Boundary(-1, -1); + textRange = new TextRange(-1, -1); return; } var firstTextBlock = atomicTextBlocks.get(0); this.atomicTextBlocks.add(firstTextBlock); - boundary = new Boundary(firstTextBlock.getBoundary().start(), firstTextBlock.getBoundary().end()); + textRange = new TextRange(firstTextBlock.getTextRange().start(), firstTextBlock.getTextRange().end()); atomicTextBlocks.subList(1, atomicTextBlocks.size()).forEach(this::concat); } @@ -50,13 +50,13 @@ public class ConcatenatedTextBlock implements TextBlock { public ConcatenatedTextBlock concat(TextBlock textBlock) { if (this.atomicTextBlocks.isEmpty()) { - boundary.setStart(textBlock.getBoundary().start()); - boundary.setEnd(textBlock.getBoundary().end()); - } else if (boundary.end() != textBlock.getBoundary().start()) { - throw new UnsupportedOperationException(format("Can only concat consecutive TextBlocks, trying to concat %s and %s", boundary, textBlock.getBoundary())); + textRange.setStart(textBlock.getTextRange().start()); + textRange.setEnd(textBlock.getTextRange().end()); + } else if (textRange.end() != textBlock.getTextRange().start()) { + throw new UnsupportedOperationException(format("Can only concat consecutive TextBlocks, trying to concat %s and %s", textRange, textBlock.getTextRange())); } this.atomicTextBlocks.addAll(textBlock.getAtomicTextBlocks()); - boundary.setEnd(textBlock.getBoundary().end()); + textRange.setEnd(textBlock.getTextRange().end()); this.searchText = null; return this; } @@ -64,13 +64,13 @@ public class ConcatenatedTextBlock implements TextBlock { private AtomicTextBlock getAtomicTextBlockByStringIndex(int stringIdx) { - return atomicTextBlocks.stream().filter(textBlock -> textBlock.getBoundary().contains(stringIdx)).findAny().orElseThrow(IndexOutOfBoundsException::new); + return atomicTextBlocks.stream().filter(textBlock -> textBlock.getTextRange().contains(stringIdx)).findAny().orElseThrow(IndexOutOfBoundsException::new); } - private List getAllAtomicTextBlocksPartiallyInStringBoundary(Boundary boundary) { + private List getAllAtomicTextBlocksPartiallyInStringBoundary(TextRange textRange) { - return atomicTextBlocks.stream().filter(tb -> tb.getBoundary().intersects(boundary)).toList(); + return atomicTextBlocks.stream().filter(tb -> tb.getTextRange().intersects(textRange)).toList(); } @@ -121,99 +121,99 @@ public class ConcatenatedTextBlock implements TextBlock { return getAtomicTextBlockByStringIndex(stringIdx).getPosition(stringIdx); } - public Boundary getLineBoundary(int lineNumber) { + public TextRange getLineTextRange(int lineNumber) { if (atomicTextBlocks.size() == 1) { - return atomicTextBlocks.get(0).getLineBoundary(lineNumber); + return atomicTextBlocks.get(0).getLineTextRange(lineNumber); } int lineNumberInCurrentBlock = lineNumber; for (AtomicTextBlock atomicTextBlock : atomicTextBlocks) { if (lineNumberInCurrentBlock < atomicTextBlock.numberOfLines()) { - return atomicTextBlock.getLineBoundary(lineNumberInCurrentBlock); + return atomicTextBlock.getLineTextRange(lineNumberInCurrentBlock); } lineNumberInCurrentBlock -= atomicTextBlock.numberOfLines(); } - return new Boundary(boundary.start(), boundary.start()); + return new TextRange(textRange.start(), textRange.start()); } @Override - public List getPositions(Boundary stringBoundary) { + public List getPositions(TextRange stringTextRange) { - List textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(stringBoundary); + List textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(stringTextRange); if (textBlocks.isEmpty()) { return Collections.emptyList(); } if (textBlocks.size() == 1) { - return textBlocks.get(0).getPositions(stringBoundary); + return textBlocks.get(0).getPositions(stringTextRange); } AtomicTextBlock firstTextBlock = textBlocks.get(0); - List positions = new LinkedList<>(firstTextBlock.getPositions(new Boundary(stringBoundary.start(), firstTextBlock.getBoundary().end()))); + List positions = new LinkedList<>(firstTextBlock.getPositions(new TextRange(stringTextRange.start(), firstTextBlock.getTextRange().end()))); for (AtomicTextBlock textBlock : textBlocks.subList(1, textBlocks.size() - 1)) { positions.addAll(textBlock.getPositions()); } var lastTextBlock = textBlocks.get(textBlocks.size() - 1); - positions.addAll(lastTextBlock.getPositions(new Boundary(lastTextBlock.getBoundary().start(), stringBoundary.end()))); + positions.addAll(lastTextBlock.getPositions(new TextRange(lastTextBlock.getTextRange().start(), stringTextRange.end()))); return positions; } @Override - public Map> getPositionsPerPage(Boundary stringBoundary) { + public Map> getPositionsPerPage(TextRange stringTextRange) { - List textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(stringBoundary); + List textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(stringTextRange); if (textBlocks.isEmpty()) { return new HashMap<>(); } if (textBlocks.size() == 1) { - return textBlocks.get(0).getPositionsPerPage(stringBoundary); + return textBlocks.get(0).getPositionsPerPage(stringTextRange); } AtomicTextBlock firstTextBlock = textBlocks.get(0); - Map> rectanglesPerLinePerPage = firstTextBlock.getPositionsPerPage(new Boundary(stringBoundary.start(), firstTextBlock.getBoundary().end())); + Map> rectanglesPerLinePerPage = firstTextBlock.getPositionsPerPage(new TextRange(stringTextRange.start(), firstTextBlock.getTextRange().end())); for (AtomicTextBlock textBlock : textBlocks.subList(1, textBlocks.size() - 1)) { - rectanglesPerLinePerPage = mergeEntityPositionsWithSamePageNode(rectanglesPerLinePerPage, textBlock.getPositionsPerPage(textBlock.getBoundary())); + rectanglesPerLinePerPage = mergeEntityPositionsWithSamePageNode(rectanglesPerLinePerPage, textBlock.getPositionsPerPage(textBlock.getTextRange())); } AtomicTextBlock lastTextBlock = textBlocks.get(textBlocks.size() - 1); rectanglesPerLinePerPage = mergeEntityPositionsWithSamePageNode(rectanglesPerLinePerPage, - lastTextBlock.getPositionsPerPage(new Boundary(lastTextBlock.getBoundary().start(), stringBoundary.end()))); + lastTextBlock.getPositionsPerPage(new TextRange(lastTextBlock.getTextRange().start(), stringTextRange.end()))); return rectanglesPerLinePerPage; } @Override - public String subSequenceWithLineBreaks(Boundary boundary) { + public String subSequenceWithLineBreaks(TextRange textRange) { - if (boundary.length() == 0 || !getBoundary().contains(boundary)) { + if (textRange.length() == 0 || !getTextRange().contains(textRange)) { return ""; } - List textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(boundary); + List textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(textRange); if (textBlocks.size() == 1) { - return textBlocks.get(0).subSequenceWithLineBreaks(boundary); + return textBlocks.get(0).subSequenceWithLineBreaks(textRange); } StringBuilder sb = new StringBuilder(); AtomicTextBlock firstTextBlock = textBlocks.get(0); - sb.append(firstTextBlock.subSequenceWithLineBreaks(new Boundary(boundary.start(), firstTextBlock.getBoundary().end()))); + sb.append(firstTextBlock.subSequenceWithLineBreaks(new TextRange(textRange.start(), firstTextBlock.getTextRange().end()))); for (AtomicTextBlock textBlock : textBlocks.subList(1, textBlocks.size() - 1)) { sb.append(textBlock.searchTextWithLineBreaks()); } var lastTextBlock = textBlocks.get(textBlocks.size() - 1); - sb.append(lastTextBlock.subSequenceWithLineBreaks(new Boundary(lastTextBlock.getBoundary().start(), boundary.end()))); + sb.append(lastTextBlock.subSequenceWithLineBreaks(new TextRange(lastTextBlock.getTextRange().start(), textRange.end()))); return sb.toString(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/textblock/TextBlock.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/textblock/TextBlock.java index e75760a8..df0d4ceb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/textblock/TextBlock.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/graph/textblock/TextBlock.java @@ -10,7 +10,7 @@ import java.util.Map; import java.util.Set; import java.util.stream.Collectors; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page; import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations; @@ -22,7 +22,7 @@ public interface TextBlock extends CharSequence { List getAtomicTextBlocks(); - Boundary getBoundary(); + TextRange getTextRange(); int getNextLinebreak(int fromIndex); @@ -31,7 +31,7 @@ public interface TextBlock extends CharSequence { int getPreviousLinebreak(int fromIndex); - Boundary getLineBoundary(int lineNumber); + TextRange getLineTextRange(int lineNumber); @@ -41,13 +41,13 @@ public interface TextBlock extends CharSequence { Rectangle2D getPosition(int stringIdx); - List getPositions(Boundary stringBoundary); + List getPositions(TextRange stringTextRange); - Map> getPositionsPerPage(Boundary stringBoundary); + Map> getPositionsPerPage(TextRange stringTextRange); - String subSequenceWithLineBreaks(Boundary boundary); + String subSequenceWithLineBreaks(TextRange textRange); int numberOfLines(); @@ -55,13 +55,13 @@ public interface TextBlock extends CharSequence { default CharSequence getLine(int lineNumber) { - return subSequence(getLineBoundary(lineNumber)); + return subSequence(getLineTextRange(lineNumber)); } default List getLinePositions(int lineNumber) { - return getPositions(getLineBoundary(lineNumber)); + return getPositions(getLineTextRange(lineNumber)); } @@ -72,13 +72,13 @@ public interface TextBlock extends CharSequence { default String searchTextWithLineBreaks() { - return subSequenceWithLineBreaks(getBoundary()); + return subSequenceWithLineBreaks(getTextRange()); } default int indexOf(String searchTerm) { - return indexOf(searchTerm, getBoundary().start()); + return indexOf(searchTerm, getTextRange().start()); } @@ -88,10 +88,10 @@ public interface TextBlock extends CharSequence { } - default Set getPages(Boundary boundary) { + default Set getPages(TextRange textRange) { return getAtomicTextBlocks().stream() - .filter(atomicTextBlock -> atomicTextBlock.getBoundary().intersects(boundary)) + .filter(atomicTextBlock -> atomicTextBlock.getTextRange().intersects(textRange)) .map(AtomicTextBlock::getPage) .collect(Collectors.toUnmodifiableSet()); } @@ -99,38 +99,38 @@ public interface TextBlock extends CharSequence { default int indexOf(String searchTerm, int startOffset) { - int start = getSearchText().indexOf(searchTerm, startOffset - getBoundary().start()); + int start = getSearchText().indexOf(searchTerm, startOffset - getTextRange().start()); if (start == -1) { return -1; } - return start + getBoundary().start(); + return start + getTextRange().start(); } default CharSequence getFirstLine() { - return subSequence(getBoundary().start(), getNextLinebreak(getBoundary().start())); + return subSequence(getTextRange().start(), getNextLinebreak(getTextRange().start())); } - default boolean containsBoundary(Boundary boundary) { + default boolean containsTextRange(TextRange textRange) { - if (boundary.end() < boundary.start()) { - throw new IllegalArgumentException(format("Invalid %s, StartIndex must be smaller than EndIndex", boundary)); + if (textRange.end() < textRange.start()) { + throw new IllegalArgumentException(format("Invalid %s, StartIndex must be smaller than EndIndex", textRange)); } - return getBoundary().contains(boundary); + return getTextRange().contains(textRange); } default boolean containsIndex(int stringIndex) { - return getBoundary().contains(stringIndex); + return getTextRange().contains(stringIndex); } - default CharSequence subSequence(Boundary boundary) { + default CharSequence subSequence(TextRange textRange) { - return subSequence(boundary.start(), boundary.end()); + return subSequence(textRange.start(), textRange.end()); } @@ -147,21 +147,21 @@ public interface TextBlock extends CharSequence { @Override default CharSequence subSequence(int start, int end) { - return getSearchText().substring(start - getBoundary().start(), end - getBoundary().start()); + return getSearchText().substring(start - getTextRange().start(), end - getTextRange().start()); } @Override default int length() { - return getBoundary().length(); + return getTextRange().length(); } @Override default char charAt(int index) { - return getSearchText().charAt(index - getBoundary().start()); + return getSearchText().charAt(index - getTextRange().start()); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/services/EntityCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/services/EntityCreationService.java index 115f51ea..c048d2ab 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/services/EntityCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/services/EntityCreationService.java @@ -18,12 +18,13 @@ import org.kie.api.runtime.KieSession; import com.google.common.base.Functions; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import com.iqser.red.service.redaction.v1.server.document.graph.ConsecutiveBoundaryCollector; import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree; import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.ManualChangeOverwrite; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.PositionOnPage; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.NodeType; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode; @@ -55,28 +56,28 @@ public class EntityCreationService { } - public Stream betweenStrings(String start, String stop, String type, EntityType entityType, SemanticNode node) { + public Stream betweenStrings(String start, String stop, String type, EntityType entityType, SemanticNode node) { - List startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock()); - List stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock()); + List startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock()); + List stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock()); return betweenBoundaries(startBoundaries, stopBoundaries, type, entityType, node); } - public Stream betweenStringsIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) { + public Stream betweenStringsIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) { - List startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock()); - List stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock()); + List startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock()); + List stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock()); return betweenBoundaries(startBoundaries, stopBoundaries, type, entityType, node); } - public Stream betweenStringsIncludeStart(String start, String stop, String type, EntityType entityType, SemanticNode node) { + public Stream betweenStringsIncludeStart(String start, String stop, String type, EntityType entityType, SemanticNode node) { - List startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock()); - List stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock()); + List startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock()); + List stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock()); startBoundaries.forEach(boundary -> { boundary.setStart(boundary.start() - start.length()); @@ -87,10 +88,10 @@ public class EntityCreationService { } - public Stream betweenStringsIncludeStartIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) { + public Stream betweenStringsIncludeStartIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) { - List startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock()); - List stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock()); + List startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock()); + List stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock()); startBoundaries.forEach(boundary -> { boundary.setStart(boundary.start() - start.length()); @@ -101,10 +102,10 @@ public class EntityCreationService { } - public Stream betweenStringsIncludeEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) { + public Stream betweenStringsIncludeEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) { - List startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock()); - List stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock()); + List startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock()); + List stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock()); stopBoundaries.forEach(boundary -> { boundary.setStart(boundary.start() + stop.length()); @@ -115,10 +116,10 @@ public class EntityCreationService { } - public Stream betweenStringsIncludeEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) { + public Stream betweenStringsIncludeEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) { - List startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock()); - List stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock()); + List startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock()); + List stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock()); stopBoundaries.forEach(boundary -> { boundary.setStart(boundary.start() + stop.length()); @@ -129,10 +130,10 @@ public class EntityCreationService { } - public Stream betweenStringsIncludeStartAndEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) { + public Stream betweenStringsIncludeStartAndEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) { - List startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock()); - List stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock()); + List startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock()); + List stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock()); startBoundaries.forEach(boundary -> { boundary.setStart(boundary.start() - start.length()); @@ -147,10 +148,10 @@ public class EntityCreationService { } - public Stream betweenStringsIncludeStartAndEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) { + public Stream betweenStringsIncludeStartAndEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) { - List startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock()); - List stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock()); + List startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock()); + List stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock()); startBoundaries.forEach(boundary -> { boundary.setStart(boundary.start() - start.length()); @@ -165,32 +166,32 @@ public class EntityCreationService { } - public Stream betweenRegexes(String regexStart, String regexStop, String type, EntityType entityType, SemanticNode node) { + public Stream betweenRegexes(String regexStart, String regexStop, String type, EntityType entityType, SemanticNode node) { TextBlock textBlock = node.getTextBlock(); - List startBoundaries = RedactionSearchUtility.findBoundariesByRegex(regexStart, textBlock); - List stopBoundaries = RedactionSearchUtility.findBoundariesByRegex(regexStop, textBlock); + List startBoundaries = RedactionSearchUtility.findBoundariesByRegex(regexStart, textBlock); + List stopBoundaries = RedactionSearchUtility.findBoundariesByRegex(regexStop, textBlock); return betweenBoundaries(startBoundaries, stopBoundaries, type, entityType, node); } - public Stream betweenRegexesIgnoreCase(String regexStart, String regexStop, String type, EntityType entityType, SemanticNode node) { + public Stream betweenRegexesIgnoreCase(String regexStart, String regexStop, String type, EntityType entityType, SemanticNode node) { TextBlock textBlock = node.getTextBlock(); - List startBoundaries = RedactionSearchUtility.findBoundariesByRegexIgnoreCase(regexStart, 0, textBlock); - List stopBoundaries = RedactionSearchUtility.findBoundariesByRegexIgnoreCase(regexStop, 0, textBlock); + List startBoundaries = RedactionSearchUtility.findBoundariesByRegexIgnoreCase(regexStart, 0, textBlock); + List stopBoundaries = RedactionSearchUtility.findBoundariesByRegexIgnoreCase(regexStop, 0, textBlock); return betweenBoundaries(startBoundaries, stopBoundaries, type, entityType, node); } - public Stream betweenBoundaries(List startBoundaries, List stopBoundaries, String type, EntityType entityType, SemanticNode node) { + public Stream betweenBoundaries(List startBoundaries, List stopBoundaries, String type, EntityType entityType, SemanticNode node) { if (startBoundaries.isEmpty() || stopBoundaries.isEmpty()) { return Stream.empty(); } - List entityBoundaries = findNonOverlappingBoundariesBetweenBoundariesWithMinimalDistances(startBoundaries, stopBoundaries); + List entityBoundaries = findNonOverlappingBoundariesBetweenBoundariesWithMinimalDistances(startBoundaries, stopBoundaries); return entityBoundaries.stream() .map(boundary -> boundary.trim(node.getTextBlock())) .filter(boundary -> isValidEntityBoundary(node.getTextBlock(), boundary)) @@ -200,23 +201,23 @@ public class EntityCreationService { } - private static List findNonOverlappingBoundariesBetweenBoundariesWithMinimalDistances(List startBoundaries, List stopBoundaries) { + private static List findNonOverlappingBoundariesBetweenBoundariesWithMinimalDistances(List startBoundaries, List stopBoundaries) { - List entityBoundaries = new LinkedList<>(); - for (Boundary startBoundary : startBoundaries) { - Optional optionalStopBoundaryWithMinimalDistance = stopBoundaries.stream() - .filter(stopBoundary -> stopBoundary.start() > startBoundary.end()) - .min(Comparator.comparingInt(Boundary::start)); + List entityBoundaries = new LinkedList<>(); + for (TextRange startTextRange : startBoundaries) { + Optional optionalStopBoundaryWithMinimalDistance = stopBoundaries.stream() + .filter(stopBoundary -> stopBoundary.start() > startTextRange.end()) + .min(Comparator.comparingInt(TextRange::start)); if (optionalStopBoundaryWithMinimalDistance.isEmpty()) { break; } - entityBoundaries.add(new Boundary(startBoundary.end(), optionalStopBoundaryWithMinimalDistance.get().start())); + entityBoundaries.add(new TextRange(startTextRange.end(), optionalStopBoundaryWithMinimalDistance.get().start())); } return removeOuterOverlappingBoundaries(entityBoundaries); } - private static List removeOuterOverlappingBoundaries(List entityBoundaries) { + private static List removeOuterOverlappingBoundaries(List entityBoundaries) { /* In some cases we get boundaries, where one contains the other. This happens for Example when we have two start boundaries and one stop boundary after the two start boundaries. Then we get two boundaries where one is entirely contained in the other. So we want to remove the outer boundary. @@ -230,9 +231,9 @@ public class EntityCreationService { } - public Stream bySearchImplementation(SearchImplementation searchImplementation, String type, EntityType entityType, SemanticNode node) { + public Stream bySearchImplementation(SearchImplementation searchImplementation, String type, EntityType entityType, SemanticNode node) { - return searchImplementation.getBoundaries(node.getTextBlock(), node.getBoundary()) + return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange()) .stream() .filter(boundary -> isValidEntityBoundary(node.getTextBlock(), boundary)) .map(bounds -> byBoundary(bounds, type, entityType, node)) @@ -241,11 +242,11 @@ public class EntityCreationService { } - public Stream lineAfterStrings(List strings, String type, EntityType entityType, SemanticNode node) { + public Stream lineAfterStrings(List strings, String type, EntityType entityType, SemanticNode node) { TextBlock textBlock = node.getTextBlock(); SearchImplementation searchImplementation = new SearchImplementation(strings, false); - return searchImplementation.getBoundaries(textBlock, node.getBoundary()) + return searchImplementation.getBoundaries(textBlock, node.getTextRange()) .stream() .map(boundary -> toLineAfterBoundary(textBlock, boundary)) .filter(boundary -> isValidEntityBoundary(textBlock, boundary)) @@ -255,11 +256,11 @@ public class EntityCreationService { } - public Stream lineAfterStringsIgnoreCase(List strings, String type, EntityType entityType, SemanticNode node) { + public Stream lineAfterStringsIgnoreCase(List strings, String type, EntityType entityType, SemanticNode node) { TextBlock textBlock = node.getTextBlock(); SearchImplementation searchImplementation = new SearchImplementation(strings, true); - return searchImplementation.getBoundaries(textBlock, node.getBoundary()) + return searchImplementation.getBoundaries(textBlock, node.getTextRange()) .stream() .map(boundary -> toLineAfterBoundary(textBlock, boundary)) .filter(boundary -> isValidEntityBoundary(textBlock, boundary)) @@ -269,7 +270,7 @@ public class EntityCreationService { } - public Stream lineAfterString(String string, String type, EntityType entityType, SemanticNode node) { + public Stream lineAfterString(String string, String type, EntityType entityType, SemanticNode node) { TextBlock textBlock = node.getTextBlock(); return RedactionSearchUtility.findBoundariesByString(string, textBlock) @@ -282,7 +283,7 @@ public class EntityCreationService { } - public Stream lineAfterStringIgnoreCase(String string, String type, EntityType entityType, SemanticNode node) { + public Stream lineAfterStringIgnoreCase(String string, String type, EntityType entityType, SemanticNode node) { TextBlock textBlock = node.getTextBlock(); return RedactionSearchUtility.findBoundariesByStringIgnoreCase(string, textBlock) @@ -295,7 +296,7 @@ public class EntityCreationService { } - public Stream lineAfterStringAcrossColumns(String string, String type, EntityType entityType, Table tableNode) { + public Stream lineAfterStringAcrossColumns(String string, String type, EntityType entityType, Table tableNode) { return tableNode.streamTableCells() .flatMap(tableCell -> lineAfterBoundariesAcrossColumns(RedactionSearchUtility.findBoundariesByString(string, tableCell.getTextBlock()), @@ -306,7 +307,7 @@ public class EntityCreationService { } - public Stream lineAfterStringAcrossColumnsIgnoreCase(String string, String type, EntityType entityType, Table tableNode) { + public Stream lineAfterStringAcrossColumnsIgnoreCase(String string, String type, EntityType entityType, Table tableNode) { return tableNode.streamTableCells() .flatMap(tableCell -> lineAfterBoundariesAcrossColumns(RedactionSearchUtility.findBoundariesByStringIgnoreCase(string, tableCell.getTextBlock()), @@ -327,7 +328,7 @@ public class EntityCreationService { * @param tableNode the table node * @return a stream of RedactionEntities */ - private Stream lineAfterBoundariesAcrossColumns(List boundaries, TableCell tableCell, String type, EntityType entityType, Table tableNode) { + private Stream lineAfterBoundariesAcrossColumns(List boundaries, TableCell tableCell, String type, EntityType entityType, Table tableNode) { return boundaries.stream() .map(boundary -> RectangleTransformations.rectangle2DBBox(tableCell.getTextBlock().getPositions(boundary))) @@ -344,13 +345,13 @@ public class EntityCreationService { } - public Optional semanticNodeAfterString(SemanticNode semanticNode, String string, String type, EntityType entityType) { + public Optional semanticNodeAfterString(SemanticNode semanticNode, String string, String type, EntityType entityType) { var textBlock = semanticNode.getTextBlock(); int startIndex = Math.min(textBlock.indexOf(string), 0); - var boundary = new Boundary(startIndex, semanticNode.getBoundary().end()); + var boundary = new TextRange(startIndex, semanticNode.getTextRange().end()); if (boundary.length() > 0) { - boundary = new Boundary(boundary.start(), boundary.end() - 1); + boundary = new TextRange(boundary.start(), boundary.end() - 1); } if (!isValidEntityBoundary(textBlock, boundary)) { return Optional.empty(); @@ -359,31 +360,31 @@ public class EntityCreationService { } - public Stream byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, SemanticNode node) { + public Stream byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, SemanticNode node) { return byRegexWithLineBreaks(regexPattern, type, entityType, 0, node); } - public Stream byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) { + public Stream byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) { return byRegexWithLineBreaksIgnoreCase(regexPattern, type, entityType, 0, node); } - public Stream byRegex(String regexPattern, String type, EntityType entityType, SemanticNode node) { + public Stream byRegex(String regexPattern, String type, EntityType entityType, SemanticNode node) { return byRegex(regexPattern, type, entityType, 0, node); } - public Stream byRegexIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) { + public Stream byRegexIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) { return byRegexIgnoreCase(regexPattern, type, entityType, 0, node); } - public Stream byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) { + public Stream byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) { return RedactionSearchUtility.findBoundariesByRegexWithLineBreaks(regexPattern, group, node.getTextBlock()) .stream() @@ -393,7 +394,7 @@ public class EntityCreationService { } - public Stream byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) { + public Stream byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) { return RedactionSearchUtility.findBoundariesByRegexWithLineBreaksIgnoreCase(regexPattern, group, node.getTextBlock()) .stream() @@ -403,7 +404,7 @@ public class EntityCreationService { } - public Stream byRegex(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) { + public Stream byRegex(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) { return RedactionSearchUtility.findBoundariesByRegex(regexPattern, group, node.getTextBlock()) .stream() @@ -413,7 +414,7 @@ public class EntityCreationService { } - public Stream byRegexIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) { + public Stream byRegexIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) { return RedactionSearchUtility.findBoundariesByRegexIgnoreCase(regexPattern, group, node.getTextBlock()) .stream() @@ -423,7 +424,7 @@ public class EntityCreationService { } - public Stream byString(String keyword, String type, EntityType entityType, SemanticNode node) { + public Stream byString(String keyword, String type, EntityType entityType, SemanticNode node) { return RedactionSearchUtility.findBoundariesByString(keyword, node.getTextBlock()) .stream() @@ -433,7 +434,7 @@ public class EntityCreationService { } - public Stream byStringIgnoreCase(String keyword, String type, EntityType entityType, SemanticNode node) { + public Stream byStringIgnoreCase(String keyword, String type, EntityType entityType, SemanticNode node) { return RedactionSearchUtility.findBoundariesByStringIgnoreCase(keyword, node.getTextBlock()) .stream() @@ -443,16 +444,16 @@ public class EntityCreationService { } - public Stream bySemanticNodeParagraphsOnly(SemanticNode node, String type, EntityType entityType) { + public Stream bySemanticNodeParagraphsOnly(SemanticNode node, String type, EntityType entityType) { return node.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(semanticNode -> bySemanticNode(semanticNode, type, entityType)).filter(Optional::isPresent).map(Optional::get); } - public Stream bySemanticNodeParagraphsOnlyMergeConsecutive(SemanticNode node, String type, EntityType entityType) { + public Stream bySemanticNodeParagraphsOnlyMergeConsecutive(SemanticNode node, String type, EntityType entityType) { return node.streamAllSubNodesOfType(NodeType.PARAGRAPH) - .map(SemanticNode::getBoundary) + .map(SemanticNode::getTextRange) .collect(new ConsecutiveBoundaryCollector()) .stream() .map(boundary -> byBoundary(boundary, type, entityType, node)) @@ -461,42 +462,42 @@ public class EntityCreationService { } - public Optional semanticNodeAfterString(String string, String type, EntityType entityType, SemanticNode node) { + public Optional semanticNodeAfterString(String string, String type, EntityType entityType, SemanticNode node) { if (!node.containsString(string)) { return Optional.empty(); } - Boundary boundary = new Boundary(node.getTextBlock().indexOf(string) + string.length(), node.getBoundary().end()); - return byBoundary(boundary, type, entityType, node); + TextRange textRange = new TextRange(node.getTextBlock().indexOf(string) + string.length(), node.getTextRange().end()); + return byBoundary(textRange, type, entityType, node); } - public Optional bySemanticNode(SemanticNode node, String type, EntityType entityType) { + public Optional bySemanticNode(SemanticNode node, String type, EntityType entityType) { - Boundary boundary = node.getTextBlock().getBoundary(); + TextRange textRange = node.getTextBlock().getTextRange(); - if (boundary.length() > 0) { - boundary = new Boundary(boundary.start(), boundary.end() - 1); + if (textRange.length() > 0) { + textRange = new TextRange(textRange.start(), textRange.end() - 1); } - if (!isValidEntityBoundary(node.getTextBlock(), boundary)) { + if (!isValidEntityBoundary(node.getTextBlock(), textRange)) { return Optional.empty(); } - return byBoundary(boundary, type, entityType, node); + return byBoundary(textRange, type, entityType, node); } - public Optional byPrefixExpansionRegex(RedactionEntity entity, String regexPattern) { + public Optional byPrefixExpansionRegex(TextEntity entity, String regexPattern) { int expandedStart = RedactionSearchUtility.getExpandedStartByRegex(entity, regexPattern); - return byBoundary(new Boundary(expandedStart, entity.getBoundary().end()), entity.getType(), entity.getEntityType(), entity.getDeepestFullyContainingNode()); + return byBoundary(new TextRange(expandedStart, entity.getTextRange().end()), entity.getType(), entity.getEntityType(), entity.getDeepestFullyContainingNode()); } - public Optional bySuffixExpansionRegex(RedactionEntity entity, String regexPattern) { + public Optional bySuffixExpansionRegex(TextEntity entity, String regexPattern) { int expandedEnd = RedactionSearchUtility.getExpandedEndByRegex(entity, regexPattern); - expandedEnd = truncateEndIfLineBreakIsBetween(entity.getBoundary().end(), expandedEnd, entity.getDeepestFullyContainingNode().getTextBlock()); - return byBoundary(new Boundary(entity.getBoundary().start(), expandedEnd), entity.getType(), entity.getEntityType(), entity.getDeepestFullyContainingNode()); + expandedEnd = truncateEndIfLineBreakIsBetween(entity.getTextRange().end(), expandedEnd, entity.getDeepestFullyContainingNode().getTextBlock()); + return byBoundary(new TextRange(entity.getTextRange().start(), expandedEnd), entity.getType(), entity.getEntityType(), entity.getDeepestFullyContainingNode()); } @@ -514,19 +515,19 @@ public class EntityCreationService { * If the document already contains an equal redaction entity, then the original Entity is returned. * Also inserts the Entity into the kieSession. * - * @param boundary The boundary of the redaction entity. + * @param textRange The boundary of the redaction entity. * @param type The type of the redaction entity. * @param entityType The entity type of the redaction entity. * @param node The semantic node to associate with the redaction entity. * @return An Optional containing the redaction entity, or the previous entity if the entity already exists. */ - public Optional byBoundary(Boundary boundary, String type, EntityType entityType, SemanticNode node) { + public Optional byBoundary(TextRange textRange, String type, EntityType entityType, SemanticNode node) { - if (!node.getBoundary().contains(boundary)) { - throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", boundary, node.getBoundary(), node)); + if (!node.getTextRange().contains(textRange)) { + throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", textRange, node.getTextRange(), node)); } - Boundary trimmedBoundary = boundary.trim(node.getTextBlock()); - RedactionEntity entity = RedactionEntity.initialEntityNode(trimmedBoundary, type, entityType); + TextRange trimmedTextRange = textRange.trim(node.getTextBlock()); + TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType); if (node.getEntities().contains(entity)) { return node.getEntities().stream().filter(entity::equals).peek(e -> e.addEngine(Engine.RULE)).findAny(); } @@ -537,16 +538,16 @@ public class EntityCreationService { } - public RedactionEntity forceByBoundary(Boundary boundary, String type, EntityType entityType, SemanticNode node) { + public TextEntity forceByBoundary(TextRange textRange, String type, EntityType entityType, SemanticNode node) { - Boundary trimmedBoundary = boundary.trim(node.getTextBlock()); - RedactionEntity entity = RedactionEntity.initialEntityNode(trimmedBoundary, type, entityType); + TextRange trimmedTextRange = textRange.trim(node.getTextBlock()); + TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType); addEntityToGraph(entity, node); return entity; } - public RedactionEntity mergeEntitiesOfSameType(List entitiesToMerge, String type, EntityType entityType, SemanticNode node) { + public TextEntity mergeEntitiesOfSameType(List entitiesToMerge, String type, EntityType entityType, SemanticNode node) { if (!allEntitiesIntersectAndHaveSameTypes(entitiesToMerge)) { throw new IllegalArgumentException("Provided entities can not be merged, since they do not intersect or are not the same type!" + entitiesToMerge); @@ -558,14 +559,17 @@ public class EntityCreationService { return entitiesToMerge.get(0); } - RedactionEntity mergedEntity = RedactionEntity.initialEntityNode(Boundary.merge(entitiesToMerge.stream().map(RedactionEntity::getBoundary).toList()), type, entityType); + TextEntity mergedEntity = TextEntity.initialEntityNode(TextRange.merge(entitiesToMerge.stream().map(TextEntity::getTextRange).toList()), type, entityType); mergedEntity.addEngines(entitiesToMerge.stream().flatMap(entityNode -> entityNode.getEngines().stream()).collect(Collectors.toSet())); - entitiesToMerge.stream().map(RedactionEntity::getMatchedRuleList).flatMap(Collection::stream).forEach(matchedRule -> mergedEntity.getMatchedRuleList().add(matchedRule)); + entitiesToMerge.stream().map(TextEntity::getMatchedRuleList).flatMap(Collection::stream).forEach(matchedRule -> mergedEntity.getMatchedRuleList().add(matchedRule)); + entitiesToMerge.stream() + .map(TextEntity::getManualOverwrite) + .map(ManualChangeOverwrite::getManualChangeLog) + .flatMap(Collection::stream) + .forEach(manualChange -> mergedEntity.getManualOverwrite().addChange(manualChange)); - mergedEntity.setDictionaryEntry(entitiesToMerge.stream().anyMatch(RedactionEntity::isDictionaryEntry)); - mergedEntity.setDossierDictionaryEntry(entitiesToMerge.stream().anyMatch(RedactionEntity::isDossierDictionaryEntry)); - mergedEntity.setIgnored(entitiesToMerge.stream().allMatch(RedactionEntity::isIgnored)); - mergedEntity.setRemoved(entitiesToMerge.stream().allMatch(RedactionEntity::isRemoved)); + mergedEntity.setDictionaryEntry(entitiesToMerge.stream().anyMatch(TextEntity::isDictionaryEntry)); + mergedEntity.setDossierDictionaryEntry(entitiesToMerge.stream().anyMatch(TextEntity::isDossierDictionaryEntry)); addEntityToGraph(mergedEntity, node); insertToKieSession(mergedEntity); @@ -573,28 +577,27 @@ public class EntityCreationService { } - public Stream byEntities(List entities, String type, EntityType entityType, SemanticNode node) { + public Stream copyEntities(List entities, String type, EntityType entityType, SemanticNode node) { - return entities.stream().map(entity -> byEntity(type, entityType, node, entity)); + return entities.stream().map(entity -> copyEntity(entity, type, entityType, node)); } - private RedactionEntity byEntity(String type, EntityType entityType, SemanticNode node, RedactionEntity entity) { + public TextEntity copyEntity(TextEntity entity, String type, EntityType entityType, SemanticNode node) { - RedactionEntity newEntity = RedactionEntity.initialEntityNode(entity.getBoundary(), type, entityType); + TextEntity newEntity = TextEntity.initialEntityNode(entity.getTextRange(), type, entityType); newEntity.addEngines(entity.getEngines()); newEntity.addMatchedRules(entity.getMatchedRuleList()); + newEntity.getManualOverwrite().addChanges(entity.getManualOverwrite().getManualChangeLog()); newEntity.setDictionaryEntry(entity.isDictionaryEntry()); newEntity.setDossierDictionaryEntry(entity.isDossierDictionaryEntry()); - newEntity.setIgnored(entity.isIgnored()); - newEntity.setRemoved(entity.isRemoved()); addEntityToGraph(newEntity, node); insertToKieSession(newEntity); return newEntity; } - private void insertToKieSession(RedactionEntity mergedEntity) { + public void insertToKieSession(TextEntity mergedEntity) { if (kieSession != null) { kieSession.insert(mergedEntity); @@ -602,25 +605,25 @@ public class EntityCreationService { } - public RedactionEntity byNerEntity(NerEntities.NerEntity nerEntity, EntityType entityType, SemanticNode semanticNode) { + public TextEntity byNerEntity(NerEntities.NerEntity nerEntity, EntityType entityType, SemanticNode semanticNode) { - var entity = forceByBoundary(nerEntity.boundary(), nerEntity.type(), entityType, semanticNode); + var entity = forceByBoundary(nerEntity.textRange(), nerEntity.type(), entityType, semanticNode); entity.addEngine(Engine.NER); insertToKieSession(entity); return entity; } - public RedactionEntity byNerEntity(NerEntities.NerEntity nerEntity, String type, EntityType entityType, SemanticNode semanticNode) { + public TextEntity byNerEntity(NerEntities.NerEntity nerEntity, String type, EntityType entityType, SemanticNode semanticNode) { - var entity = forceByBoundary(nerEntity.boundary(), type, entityType, semanticNode); + var entity = forceByBoundary(nerEntity.textRange(), type, entityType, semanticNode); entity.addEngine(Engine.NER); insertToKieSession(entity); return entity; } - public Stream combineNerEntitiesToCbiAddressDefaults(NerEntities nerEntities, String type, EntityType entityType, SemanticNode semanticNode) { + public Stream combineNerEntitiesToCbiAddressDefaults(NerEntities nerEntities, String type, EntityType entityType, SemanticNode semanticNode) { return NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities) .map(boundary -> forceByBoundary(boundary, type, entityType, semanticNode)) @@ -629,28 +632,28 @@ public class EntityCreationService { } - public RedactionEntity byTableCellAsHighlight(TableCell tableCell, String type, EntityType entityType) { + public TextEntity byTableCellAsHighlight(TableCell tableCell, String type, EntityType entityType) { - RedactionEntity highlightEntity = RedactionEntity.initialEntityNode(new Boundary(tableCell.getBoundary().start(), tableCell.getBoundary().start()), type, entityType); + TextEntity highlightEntity = TextEntity.initialEntityNode(new TextRange(tableCell.getTextRange().start(), tableCell.getTextRange().start()), type, entityType); String positionId = IdBuilder.buildId(tableCell.getBBox().keySet(), tableCell.getBBox().values().stream().toList(), type, entityType.name()); - highlightEntity.setRedactionPositionsPerPage(tableCell.getBBox() + highlightEntity.setPositionsOnPagePerPage(tableCell.getBBox() .entrySet() .stream() - .map(entry -> new RedactionPosition(positionId, entry.getKey(), List.of(entry.getValue()))) + .map(entry -> new PositionOnPage(positionId, entry.getKey(), List.of(entry.getValue()))) .toList()); addEntityToGraph(highlightEntity, tableCell); return highlightEntity; } - public boolean isValidEntityBoundary(TextBlock textBlock, Boundary boundary) { + public boolean isValidEntityBoundary(TextBlock textBlock, TextRange textRange) { - return boundary.length() > 0 && boundaryIsSurroundedBySeparators(textBlock, boundary); + return textRange.length() > 0 && boundaryIsSurroundedBySeparators(textBlock, textRange); } - public void addEntityToGraph(RedactionEntity entity, SemanticNode node) { + public void addEntityToGraph(TextEntity entity, SemanticNode node) { DocumentTree documentTree = node.getDocumentTree(); try { @@ -667,10 +670,10 @@ public class EntityCreationService { } - private void addEntityToGraph(RedactionEntity entity, DocumentTree documentTree) { + private void addEntityToGraph(TextEntity entity, DocumentTree documentTree) { SemanticNode containingNode = documentTree.childNodes(Collections.emptyList()) - .filter(node -> node.getTextBlock().containsBoundary(entity.getBoundary())) + .filter(node -> node.getTextBlock().containsTextRange(entity.getTextRange())) .findFirst() .orElseThrow(() -> new NoSuchElementException("No containing Node found!")); @@ -684,30 +687,30 @@ public class EntityCreationService { } - private static void addToPages(RedactionEntity entity) { + private static void addToPages(TextEntity entity) { - Set pages = entity.getDeepestFullyContainingNode().getPages(entity.getBoundary()); + Set pages = entity.getDeepestFullyContainingNode().getPages(entity.getTextRange()); entity.getPages().addAll(pages); pages.forEach(page -> page.getEntities().add(entity)); } - private static void addEntityToNodeEntitySets(RedactionEntity entity) { + private static void addEntityToNodeEntitySets(TextEntity entity) { entity.getIntersectingNodes().forEach(node -> node.getEntities().add(entity)); } - private static boolean allEntitiesIntersectAndHaveSameTypes(List entitiesToMerge) { + private static boolean allEntitiesIntersectAndHaveSameTypes(List entitiesToMerge) { if (entitiesToMerge.isEmpty()) { return true; } - RedactionEntity previousEntity = entitiesToMerge.get(0); - for (RedactionEntity redactionEntity : entitiesToMerge.subList(1, entitiesToMerge.size())) { - boolean typeMatches = redactionEntity.getType().equals(previousEntity.getType()); - boolean entityTypeMatches = redactionEntity.getEntityType().equals(previousEntity.getEntityType()); - boolean intersects = redactionEntity.intersects(previousEntity); + TextEntity previousEntity = entitiesToMerge.get(0); + for (TextEntity textEntity : entitiesToMerge.subList(1, entitiesToMerge.size())) { + boolean typeMatches = textEntity.getType().equals(previousEntity.getType()); + boolean entityTypeMatches = textEntity.getEntityType().equals(previousEntity.getEntityType()); + boolean intersects = textEntity.intersects(previousEntity); if (!typeMatches || !entityTypeMatches || !intersects) { return false; } @@ -716,9 +719,9 @@ public class EntityCreationService { } - private static Boundary toLineAfterBoundary(TextBlock textBlock, Boundary boundary) { + private static TextRange toLineAfterBoundary(TextBlock textBlock, TextRange textRange) { - return new Boundary(boundary.end(), textBlock.getNextLinebreak(boundary.end())).trim(textBlock); + return new TextRange(textRange.end(), textBlock.getNextLinebreak(textRange.end())).trim(textBlock); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/services/EntityEnrichmentService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/services/EntityEnrichmentService.java index 3578d3ca..926fff44 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/services/EntityEnrichmentService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/services/EntityEnrichmentService.java @@ -6,7 +6,7 @@ import java.util.Objects; import org.springframework.stereotype.Service; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock; import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings; @@ -19,17 +19,17 @@ public class EntityEnrichmentService { private final RedactionServiceSettings redactionServiceSettings; - public void enrichEntity(RedactionEntity entity, TextBlock textBlock) { + public void enrichEntity(TextEntity entity, TextBlock textBlock) { - entity.setValue(textBlock.subSequence(entity.getBoundary()).toString()); - entity.setTextAfter(findTextAfter(entity.getBoundary().end(), textBlock)); - entity.setTextBefore(findTextBefore(entity.getBoundary().start(), textBlock)); + entity.setValue(textBlock.subSequence(entity.getTextRange()).toString()); + entity.setTextAfter(findTextAfter(entity.getTextRange().end(), textBlock)); + entity.setTextBefore(findTextBefore(entity.getTextRange().start(), textBlock)); } private String findTextAfter(int index, TextBlock textBlock) { - int endOffset = Math.min(index + redactionServiceSettings.getSurroundingWordsOffsetWindow(), textBlock.getBoundary().end()); + int endOffset = Math.min(index + redactionServiceSettings.getSurroundingWordsOffsetWindow(), textBlock.getTextRange().end()); String textAfter = textBlock.subSequence(index, endOffset).toString(); if (!textAfter.isBlank()) { List wordsAfter = splitToWordsAndRemoveEmptyWords(textAfter); @@ -44,7 +44,7 @@ public class EntityEnrichmentService { private String findTextBefore(int index, TextBlock textBlock) { - int offsetBefore = Math.max(index - redactionServiceSettings.getSurroundingWordsOffsetWindow(), textBlock.getBoundary().start()); + int offsetBefore = Math.max(index - redactionServiceSettings.getSurroundingWordsOffsetWindow(), textBlock.getTextRange().start()); String textBefore = textBlock.subSequence(offsetBefore, index).toString(); if (!textBefore.isBlank()) { List wordsBefore = splitToWordsAndRemoveEmptyWords(textBefore); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/services/ManualChangesApplicationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/services/ManualChangesApplicationService.java new file mode 100644 index 00000000..ee701a12 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/services/ManualChangesApplicationService.java @@ -0,0 +1,95 @@ +package com.iqser.red.service.redaction.v1.server.document.services; + +import java.awt.geom.Rectangle2D; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.NoSuchElementException; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.Entity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.PositionOnPage; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Image; +import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType; +import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode; +import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations; + +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public class ManualChangesApplicationService { + + private final EntityCreationService entityCreationService; + + + public void recategorize(Entity entityToBeReCategorized, ManualImageRecategorization manualImageRecategorization) { + + if (entityToBeReCategorized instanceof Image image) { + image.setImageType(ImageType.fromString(manualImageRecategorization.getType())); + return; + } + // need to create a new entity and copy over all values, since type is part of the primary key for entities and should never be changed! + if (entityToBeReCategorized instanceof TextEntity textEntity) { + TextEntity recategorizedEntity = entityCreationService.copyEntity(textEntity, manualImageRecategorization.getType(), textEntity.getEntityType(), textEntity.getDeepestFullyContainingNode()); + recategorizedEntity.setPositionsOnPagePerPage(textEntity.getPositionsOnPagePerPage()); + recategorizedEntity.getManualOverwrite().addChange(manualImageRecategorization); + textEntity.removeFromGraph(); + } + } + + + public void resizeEntityAndReinsert(TextEntity entityToBeResized, ManualResizeRedaction manualResizeRedaction) { + + PositionOnPage positionOnPageToBeResized = entityToBeResized.getPositionsOnPagePerPage() + .stream() + .filter(redactionPosition -> redactionPosition.getId().equals(manualResizeRedaction.getAnnotationId())) + .findFirst() + .orElseThrow(() -> new NoSuchElementException("No redaction position with matching annotation id found!")); + + positionOnPageToBeResized.setRectanglePerLine(manualResizeRedaction.getPositions().stream().map(ManualChangesApplicationService::toRectangle2D).toList()); + + int newStartOffset; + if (manualResizeRedaction.getValue().length() > entityToBeResized.getValue().length()) { + newStartOffset = entityToBeResized.getTextRange().start() - manualResizeRedaction.getValue().indexOf(entityToBeResized.getValue()); + } else { + newStartOffset = entityToBeResized.getTextRange().start() + entityToBeResized.getValue().indexOf(manualResizeRedaction.getValue()); + } + + // need to reinsert the entity, due to the boundary having changed. + removeAndUpdateAndReInsertEntity(entityToBeResized, manualResizeRedaction, newStartOffset); + entityToBeResized.getManualOverwrite().addChange(manualResizeRedaction); + } + + + private void removeAndUpdateAndReInsertEntity(TextEntity entityToBeResized, ManualResizeRedaction manualResizeRedaction, int newStartOffset) { + + SemanticNode nodeToInsertInto = entityToBeResized.getDeepestFullyContainingNode().getDocumentTree().getRoot().getNode(); + entityToBeResized.getIntersectingNodes().forEach(node -> node.getEntities().remove(this)); + entityToBeResized.getPages().forEach(page -> page.getEntities().remove(this)); + entityToBeResized.setIntersectingNodes(new LinkedList<>()); + entityToBeResized.setDeepestFullyContainingNode(null); + entityToBeResized.setPages(new HashSet<>()); + entityToBeResized.getTextRange().setStart(newStartOffset); + entityToBeResized.getTextRange().setEnd(newStartOffset + manualResizeRedaction.getValue().length()); + entityCreationService.addEntityToGraph(entityToBeResized, nodeToInsertInto); + } + + + public void resizeImage(Image image, ManualResizeRedaction manualResizeRedaction) { + + if (manualResizeRedaction.getPositions().isEmpty() || manualResizeRedaction.getPositions() == null) { + return; + } + var bBox = RectangleTransformations.rectangle2DBBox(manualResizeRedaction.getPositions().stream().map(ManualChangesApplicationService::toRectangle2D).toList()); + image.setPosition(bBox); + image.getManualOverwrite().addChange(manualResizeRedaction); + } + + + private static Rectangle2D toRectangle2D(com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle rect) { + + return new Rectangle2D.Double(rect.getTopLeftX() - rect.getWidth(), rect.getTopLeftY() - rect.getHeight(), rect.getWidth(), rect.getHeight()); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/services/ManualRedactionApplicationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/services/ManualRedactionApplicationService.java deleted file mode 100644 index e1d4ea29..00000000 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/services/ManualRedactionApplicationService.java +++ /dev/null @@ -1,64 +0,0 @@ -package com.iqser.red.service.redaction.v1.server.document.services; - -import java.awt.geom.Rectangle2D; -import java.util.NoSuchElementException; - -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; -import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition; -import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Image; -import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations; - -import lombok.RequiredArgsConstructor; - -@RequiredArgsConstructor -public class ManualRedactionApplicationService { - - private final EntityCreationService entityCreationService; - - - public void resizeEntityAndReinsert(RedactionEntity entityToBeResized, ManualResizeRedaction manualResizeRedaction) { - - RedactionPosition redactionPositionToBeResized = entityToBeResized.getRedactionPositionsPerPage() - .stream() - .filter(redactionPosition -> redactionPosition.getId().equals(manualResizeRedaction.getAnnotationId())) - .findFirst() - .orElseThrow(() -> new NoSuchElementException("No redaction position with matching annotation id found!")); - - redactionPositionToBeResized.setRectanglePerLine(manualResizeRedaction.getPositions().stream().map(ManualRedactionApplicationService::toRectangle2D).toList()); - - int newStartOffset; - if (manualResizeRedaction.getValue().length() > entityToBeResized.getValue().length()) { - newStartOffset = entityToBeResized.getBoundary().start() - manualResizeRedaction.getValue().indexOf(entityToBeResized.getValue()); - } else { - newStartOffset = entityToBeResized.getBoundary().start() + entityToBeResized.getValue().indexOf(manualResizeRedaction.getValue()); - } - - SemanticNode nodeToInsertInto = entityToBeResized.getDeepestFullyContainingNode().getDocumentTree().getRoot().getNode(); - entityToBeResized.removeFromGraph(); - entityToBeResized.setResized(true); - entityToBeResized.setRemoved(false); - entityToBeResized.setIgnored(false); - entityToBeResized.getBoundary().setStart(newStartOffset); - entityToBeResized.getBoundary().setEnd(newStartOffset + manualResizeRedaction.getValue().length()); - entityCreationService.addEntityToGraph(entityToBeResized, nodeToInsertInto); - } - - - public void resizeImage(Image image, ManualResizeRedaction manualResizeRedaction) { - - if (manualResizeRedaction.getPositions().isEmpty() || manualResizeRedaction.getPositions() == null) { - return; - } - var bBox = RectangleTransformations.rectangle2DBBox(manualResizeRedaction.getPositions().stream().map(ManualRedactionApplicationService::toRectangle2D).toList()); - image.setPosition(bBox); - } - - - private static Rectangle2D toRectangle2D(com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle rect) { - - return new Rectangle2D.Double(rect.getTopLeftX() - rect.getWidth(), rect.getTopLeftY() - rect.getHeight(), rect.getWidth(), rect.getHeight()); - } - -} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/utils/RectangleTransformations.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/utils/RectangleTransformations.java index 28d036ab..586e0af4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/utils/RectangleTransformations.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/utils/RectangleTransformations.java @@ -12,8 +12,6 @@ import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Collector; - -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.AtomicTextBlock; @@ -22,7 +20,6 @@ import lombok.NoArgsConstructor; public class RectangleTransformations { - public static Rectangle2D atomicTextBlockBBox(List atomicTextBlocks) { return atomicTextBlocks.stream().flatMap(atomicTextBlock -> atomicTextBlock.getPositions().stream()).collect(new Rectangle2DBBoxCollector()); @@ -44,15 +41,6 @@ public class RectangleTransformations { } - public static Rectangle toRedactionLogRectangle(Rectangle2D rectangle2D, int pageNumber) { - - return new Rectangle(new Point((float) rectangle2D.getMinX(), (float) (rectangle2D.getMinY() + rectangle2D.getHeight())), - (float) rectangle2D.getWidth(), - -(float) rectangle2D.getHeight(), - pageNumber); - } - - public static Rectangle2D rectangle2DBBox(List rectangle2DList) { return rectangle2DList.stream().collect(new Rectangle2DBBoxCollector()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/utils/RedactionSearchUtility.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/utils/RedactionSearchUtility.java index a87f37e6..a43b5d43 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/utils/RedactionSearchUtility.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/document/utils/RedactionSearchUtility.java @@ -9,8 +9,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.IntStream; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock; import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns; @@ -51,60 +51,60 @@ public class RedactionSearchUtility { } - public static Boundary findFirstBoundary(String regexPattern, CharSequence searchText) { + public static TextRange findFirstBoundary(String regexPattern, CharSequence searchText) { Pattern pattern = Patterns.getCompiledPattern(regexPattern, false); Matcher matcher = pattern.matcher(searchText); if (matcher.find()) { - return new Boundary(matcher.start(), matcher.end()); + return new TextRange(matcher.start(), matcher.end()); } throw new IllegalArgumentException(format("Charsequence %s does not contain any matches for pattern %s", searchText, regexPattern)); } - public static int getExpandedEndByRegex(RedactionEntity entity, String regexPattern) { + public static int getExpandedEndByRegex(TextEntity entity, String regexPattern) { int expandedEnd; if (anyMatch(entity.getTextAfter(), regexPattern)) { - Boundary postfixBoundary = findFirstBoundary(regexPattern, entity.getTextAfter()); - expandedEnd = postfixBoundary.end() + entity.getBoundary().end(); + TextRange postfixTextRange = findFirstBoundary(regexPattern, entity.getTextAfter()); + expandedEnd = postfixTextRange.end() + entity.getTextRange().end(); } else { - expandedEnd = entity.getBoundary().end(); + expandedEnd = entity.getTextRange().end(); } return expandedEnd; } - public static int getExpandedStartByRegex(RedactionEntity entity, String regexPattern) { + public static int getExpandedStartByRegex(TextEntity entity, String regexPattern) { int expandedStart; if (anyMatch(entity.getTextBefore(), regexPattern)) { - Boundary prefixBoundary = findFirstBoundary(regexPattern, entity.getTextBefore()); - expandedStart = prefixBoundary.start() + entity.getBoundary().start() - entity.getTextBefore().length(); + TextRange prefixTextRange = findFirstBoundary(regexPattern, entity.getTextBefore()); + expandedStart = prefixTextRange.start() + entity.getTextRange().start() - entity.getTextBefore().length(); } else { - expandedStart = entity.getBoundary().start(); + expandedStart = entity.getTextRange().start(); } return expandedStart; } - public static Boundary findBoundaryOfAllLinesInYRange(double maxY, double minY, TextBlock textBlock) { + public static TextRange findBoundaryOfAllLinesInYRange(double maxY, double minY, TextBlock textBlock) { - List lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed().map(textBlock::getLineBoundary).filter(lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary)).toList(); + List lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed().map(textBlock::getLineTextRange).filter(lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary)).toList(); if (lineBoundaries.isEmpty()) { - return new Boundary(textBlock.getBoundary().start(), textBlock.getBoundary().start()); + return new TextRange(textBlock.getTextRange().start(), textBlock.getTextRange().start()); } - return Boundary.merge(lineBoundaries); + return TextRange.merge(lineBoundaries); } - private static boolean isWithinYRange(double maxY, double minY, TextBlock textBlock, Boundary lineBoundary) { + private static boolean isWithinYRange(double maxY, double minY, TextBlock textBlock, TextRange lineTextRange) { - Rectangle2D lineBBox = RectangleTransformations.rectangle2DBBox(textBlock.getPositions(lineBoundary)); + Rectangle2D lineBBox = RectangleTransformations.rectangle2DBBox(textBlock.getPositions(lineTextRange)); return lineBBox.getMinY() < maxY && minY < lineBBox.getMaxY(); } - public static List findBoundariesByRegex(String regexPattern, TextBlock textBlock) { + public static List findBoundariesByRegex(String regexPattern, TextBlock textBlock) { Pattern pattern = Patterns.getCompiledPattern(regexPattern, false); return getBoundariesByPattern(textBlock, 0, pattern); @@ -112,68 +112,68 @@ public class RedactionSearchUtility { } - public static List findBoundariesByRegex(String regexPattern, int group, TextBlock textBlock) { + public static List findBoundariesByRegex(String regexPattern, int group, TextBlock textBlock) { Pattern pattern = Patterns.getCompiledPattern(regexPattern, false); return getBoundariesByPattern(textBlock, group, pattern); } - public static List findBoundariesByRegexWithLineBreaks(String regexPattern, int group, TextBlock textBlock) { + public static List findBoundariesByRegexWithLineBreaks(String regexPattern, int group, TextBlock textBlock) { Pattern pattern = Patterns.getCompiledMultilinePattern(regexPattern, false); return getBoundariesByPatternWithLineBreaks(textBlock, group, pattern); } - public static List findBoundariesByRegexWithLineBreaksIgnoreCase(String regexPattern, int group, TextBlock textBlock) { + public static List findBoundariesByRegexWithLineBreaksIgnoreCase(String regexPattern, int group, TextBlock textBlock) { Pattern pattern = Patterns.getCompiledMultilinePattern(regexPattern, true); return getBoundariesByPatternWithLineBreaks(textBlock, group, pattern); } - public static List findBoundariesByRegexIgnoreCase(String regexPattern, int group, TextBlock textBlock) { + public static List findBoundariesByRegexIgnoreCase(String regexPattern, int group, TextBlock textBlock) { Pattern pattern = Patterns.getCompiledPattern(regexPattern, true); return getBoundariesByPattern(textBlock, group, pattern); } - private static List getBoundariesByPattern(TextBlock textBlock, int group, Pattern pattern) { + private static List getBoundariesByPattern(TextBlock textBlock, int group, Pattern pattern) { - Matcher matcher = pattern.matcher(textBlock.subSequence(textBlock.getBoundary())); - List boundaries = new LinkedList<>(); + Matcher matcher = pattern.matcher(textBlock.subSequence(textBlock.getTextRange())); + List boundaries = new LinkedList<>(); while (matcher.find()) { - boundaries.add(new Boundary(matcher.start(group) + textBlock.getBoundary().start(), matcher.end(group) + textBlock.getBoundary().start())); + boundaries.add(new TextRange(matcher.start(group) + textBlock.getTextRange().start(), matcher.end(group) + textBlock.getTextRange().start())); } return boundaries; } - private static List getBoundariesByPatternWithLineBreaks(TextBlock textBlock, int group, Pattern pattern) { + private static List getBoundariesByPatternWithLineBreaks(TextBlock textBlock, int group, Pattern pattern) { String searchTextWithLineBreaks = textBlock.searchTextWithLineBreaks(); Matcher matcher = pattern.matcher(searchTextWithLineBreaks); - List boundaries = new LinkedList<>(); + List boundaries = new LinkedList<>(); while (matcher.find()) { - boundaries.add(new Boundary(matcher.start(group) + textBlock.getBoundary().start(), matcher.end(group) + textBlock.getBoundary().start())); + boundaries.add(new TextRange(matcher.start(group) + textBlock.getTextRange().start(), matcher.end(group) + textBlock.getTextRange().start())); } return boundaries; } - public static List findBoundariesByString(String searchString, TextBlock textBlock) { + public static List findBoundariesByString(String searchString, TextBlock textBlock) { - List boundaries = new LinkedList<>(); + List boundaries = new LinkedList<>(); for (int index = textBlock.indexOf(searchString); index >= 0; index = textBlock.indexOf(searchString, index + 1)) { - boundaries.add(new Boundary(index, index + searchString.length())); + boundaries.add(new TextRange(index, index + searchString.length())); } return boundaries; } - public static List findBoundariesByStringIgnoreCase(String searchString, TextBlock textBlock) { + public static List findBoundariesByStringIgnoreCase(String searchString, TextBlock textBlock) { Pattern pattern = Pattern.compile(Pattern.quote(searchString), Pattern.CASE_INSENSITIVE); return getBoundariesByPattern(textBlock, 0, pattern); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/CustomEntityCreationAdapter.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/CustomEntityCreationAdapter.java index 7c6e9a32..e4fdf7c7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/CustomEntityCreationAdapter.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/CustomEntityCreationAdapter.java @@ -20,15 +20,15 @@ import org.springframework.stereotype.Service; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.PositionOnPage; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService; import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService; -import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier; +import com.iqser.red.service.redaction.v1.server.redaction.model.ManualEntity; import com.iqser.red.service.redaction.v1.server.redaction.model.RectangleWithPage; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.SearchImplementation; @@ -49,85 +49,85 @@ public class CustomEntityCreationAdapter { } - public List toRedactionEntity(RedactionLog redactionLog, SemanticNode node) { + public List toRedactionEntity(RedactionLog redactionLog, SemanticNode node) { - List entityIdentifiers = redactionLog.getRedactionLogEntry().stream().map(EntityIdentifier::fromRedactionLogEntry).toList(); - return toRedactionEntity(entityIdentifiers, node); + List manualEntities = redactionLog.getRedactionLogEntry().stream().map(ManualEntity::fromRedactionLogEntry).toList(); + return toRedactionEntity(manualEntities, node); } - public List createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set manualRedactionEntries, SemanticNode node) { + public List createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set manualRedactionEntries, SemanticNode node) { - List entityIdentifiers = manualRedactionEntries.stream() + List manualEntities = manualRedactionEntries.stream() .filter(manualRedactionEntry -> !(manualRedactionEntry.isAddToDictionary() || manualRedactionEntry.isAddToDossierDictionary())) - .map(EntityIdentifier::fromManualRedactionEntry) + .map(ManualEntity::fromManualRedactionEntry) .toList(); - return toRedactionEntity(entityIdentifiers, node); + return toRedactionEntity(manualEntities, node); } - private List toRedactionEntity(List entityIdentifiers, SemanticNode node) { + private List toRedactionEntity(List manualEntities, SemanticNode node) { - Set pageNumbers = entityIdentifiers.stream().flatMap(entry -> entry.getEntityPosition().stream().map(RectangleWithPage::pageNumber)).collect(Collectors.toSet()); - Set entryValues = entityIdentifiers.stream().map(EntityIdentifier::getValue).map(String::toLowerCase).collect(Collectors.toSet()); + Set pageNumbers = manualEntities.stream().flatMap(entry -> entry.getEntityPosition().stream().map(RectangleWithPage::pageNumber)).collect(Collectors.toSet()); + Set entryValues = manualEntities.stream().map(ManualEntity::getValue).map(String::toLowerCase).collect(Collectors.toSet()); - Map> tempEntitiesByValue = findAllPossibleEntitiesAndGroupByValue(node, pageNumbers, entryValues); + Map> tempEntitiesByValue = findAllPossibleEntitiesAndGroupByValue(node, pageNumbers, entryValues); - List notFoundEntityIdentifiers = new LinkedList<>(); - for (EntityIdentifier entityIdentifier : entityIdentifiers) { - Optional optionalRedactionEntity = findClosestEntityAndReturnEmptyIfNotFound(entityIdentifier, tempEntitiesByValue); + List notFoundManualEntities = new LinkedList<>(); + for (ManualEntity manualEntity : manualEntities) { + Optional optionalRedactionEntity = findClosestEntityAndReturnEmptyIfNotFound(manualEntity, tempEntitiesByValue); if (optionalRedactionEntity.isEmpty()) { - notFoundEntityIdentifiers.add(entityIdentifier); + notFoundManualEntities.add(manualEntity); continue; } - createCorrectEntity(entityIdentifier, node, optionalRedactionEntity.get().getBoundary()); + createCorrectEntity(manualEntity, node, optionalRedactionEntity.get().getTextRange()); } - tempEntitiesByValue.values().stream().flatMap(Collection::stream).forEach(RedactionEntity::removeFromGraph); - return notFoundEntityIdentifiers; + tempEntitiesByValue.values().stream().flatMap(Collection::stream).forEach(TextEntity::removeFromGraph); + return notFoundManualEntities; } /** * Deletes the temp Entity and creates a RedactionEntity with correct values, based on the given parameters. * - * @param entityIdentifier The entity identifier for the RedactionEntity. + * @param manualEntity The entity identifier for the RedactionEntity. * @param node The SemanticNode associated with the RedactionEntity. - * @param closestBoundary The closest Boundary to the RedactionEntity. + * @param closestTextRange The closest Boundary to the RedactionEntity. */ - private void createCorrectEntity(EntityIdentifier entityIdentifier, SemanticNode node, Boundary closestBoundary) { + private void createCorrectEntity(ManualEntity manualEntity, SemanticNode node, TextRange closestTextRange) { - RedactionEntity correctEntity = entityCreationService.forceByBoundary(closestBoundary, entityIdentifier.getType(), entityIdentifier.getEntityType(), node); + TextEntity correctEntity = entityCreationService.forceByBoundary(closestTextRange, manualEntity.getType(), manualEntity.getEntityType(), node); - if (entityIdentifier.isApplied()) { - correctEntity.force(entityIdentifier.getRuleIdentifier(), entityIdentifier.getReason(), entityIdentifier.getLegalBasis()); + if (manualEntity.isApplied()) { + correctEntity.force(manualEntity.getRuleIdentifier(), manualEntity.getReason(), manualEntity.getLegalBasis()); } else { - correctEntity.skip(entityIdentifier.getRuleIdentifier(), entityIdentifier.getReason()); + correctEntity.skip(manualEntity.getRuleIdentifier(), manualEntity.getReason()); } - correctEntity.setDictionaryEntry(entityIdentifier.isDictionaryEntry()); - correctEntity.setDossierDictionaryEntry(entityIdentifier.isDossierDictionaryEntry()); + correctEntity.setDictionaryEntry(manualEntity.isDictionaryEntry()); + correctEntity.setDossierDictionaryEntry(manualEntity.isDossierDictionaryEntry()); + correctEntity.getManualOverwrite().addChanges(manualEntity.getManualOverwrite().getManualChangeLog()); - // TODO: refactor this away! This is only needed so the persistence service can apply the correct comment and ManualChanges. - // It would be better, if the redaction-service returns a map of annotationId changes and the persistence service then migrates the annotationIds of Comments and ManualRedactions - List redactionPositionsWithIdOfManualRedaction = new ArrayList<>(correctEntity.getRedactionPositionsPerPage().size()); - for (RedactionPosition redactionPosition : correctEntity.getRedactionPositionsPerPage()) { - redactionPositionsWithIdOfManualRedaction.add(new RedactionPosition(entityIdentifier.getId(), redactionPosition.getPage(), redactionPosition.getRectanglePerLine())); + // AnnotationIds must match the IDs in the add requests, or comments break. Maybe think about migrating IDs on the fly! + List redactionPositionsWithIdOfManualOnPage = new ArrayList<>(correctEntity.getPositionsOnPagePerPage().size()); + for (PositionOnPage positionOnPage : correctEntity.getPositionsOnPagePerPage()) { + redactionPositionsWithIdOfManualOnPage.add(new PositionOnPage(manualEntity.getId(), positionOnPage.getPage(), positionOnPage.getRectanglePerLine())); } - correctEntity.setRedactionPositionsPerPage(redactionPositionsWithIdOfManualRedaction); + correctEntity.setPositionsOnPagePerPage(redactionPositionsWithIdOfManualOnPage); } - private Optional findClosestEntityAndReturnEmptyIfNotFound(EntityIdentifier identifier, Map> entitiesWithSameValue) { + private Optional findClosestEntityAndReturnEmptyIfNotFound(ManualEntity identifier, Map> entitiesWithSameValue) { - List possibleEntities = entitiesWithSameValue.get(identifier.getValue().toLowerCase(Locale.ROOT)); + List possibleEntities = entitiesWithSameValue.get(identifier.getValue().toLowerCase(Locale.ROOT)); if (entityIdentifierValueNotFound(possibleEntities)) { log.warn("Entity could not be created with identifier: {}, due to the value {} not being found anywhere.", identifier, identifier.getValue()); return Optional.empty(); } - Optional optionalClosestEntity = possibleEntities.stream() + Optional optionalClosestEntity = possibleEntities.stream() .filter(entity -> pagesMatch(entity, identifier.getEntityPosition())) .min(Comparator.comparingDouble(entity -> calculateMinDistance(identifier.getEntityPosition(), entity))); @@ -136,14 +136,14 @@ public class CustomEntityCreationAdapter { return Optional.empty(); } - RedactionEntity closestEntity = optionalClosestEntity.get(); + TextEntity closestEntity = optionalClosestEntity.get(); double distance = calculateMinDistance(identifier.getEntityPosition(), closestEntity); if (distance > MATCH_THRESHOLD) { log.warn(format("Distance to closest found entity is %.2f and therefore higher than the threshold of %.2f for \n%s \n%s", distance, MATCH_THRESHOLD, identifier.getEntityPosition(), - closestEntity.getRedactionPositionsPerPage())); + closestEntity.getPositionsOnPagePerPage())); return Optional.empty(); } @@ -151,13 +151,13 @@ public class CustomEntityCreationAdapter { } - private static boolean entityIdentifierValueNotFound(List possibleEntities) { + private static boolean entityIdentifierValueNotFound(List possibleEntities) { return possibleEntities == null || possibleEntities.isEmpty(); } - private Map> findAllPossibleEntitiesAndGroupByValue(SemanticNode node, Set pageNumbers, Set entryValues) { + private Map> findAllPossibleEntitiesAndGroupByValue(SemanticNode node, Set pageNumbers, Set entryValues) { if (!pageNumbers.stream().allMatch(node::onPage)) { throw new IllegalArgumentException(format("SemanticNode \"%s\" does not contain these pages %s, it has pages: %s", @@ -167,28 +167,28 @@ public class CustomEntityCreationAdapter { } SearchImplementation searchImplementation = new SearchImplementation(entryValues, true); - return searchImplementation.getBoundaries(node.getTextBlock(), node.getBoundary()) + return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange()) .stream() .map(boundary -> entityCreationService.forceByBoundary(boundary, "temp", EntityType.ENTITY, node)) .collect(groupingBy(entity -> entity.getValue().toLowerCase(Locale.ROOT))); } - private static boolean allValuesFound(Map> entitiesByValue, Set entryValues) { + private static boolean allValuesFound(Map> entitiesByValue, Set entryValues) { return entitiesByValue.keySet().equals(entryValues); } - private static boolean pagesMatch(RedactionEntity entity, List originalPositions) { + private static boolean pagesMatch(TextEntity entity, List originalPositions) { - Set entityPageNumbers = entity.getRedactionPositionsPerPage().stream().map(RedactionPosition::getPage).map(Page::getNumber).collect(Collectors.toSet()); + Set entityPageNumbers = entity.getPositionsOnPagePerPage().stream().map(PositionOnPage::getPage).map(Page::getNumber).collect(Collectors.toSet()); Set originalPageNumbers = originalPositions.stream().map(RectangleWithPage::pageNumber).collect(Collectors.toSet()); return entityPageNumbers.containsAll(originalPageNumbers); } - private double calculateMinDistance(List originalPositions, RedactionEntity entity) { + private double calculateMinDistance(List originalPositions, TextEntity entity) { if (originalPositions.size() != countRectangles(entity)) { return Double.MAX_VALUE; @@ -199,18 +199,18 @@ public class CustomEntityCreationAdapter { } - private static long countRectangles(RedactionEntity entity) { + private static long countRectangles(TextEntity entity) { - return entity.getRedactionPositionsPerPage().stream().mapToLong(redactionPosition -> redactionPosition.getRectanglePerLine().size()).sum(); + return entity.getPositionsOnPagePerPage().stream().mapToLong(redactionPosition -> redactionPosition.getRectanglePerLine().size()).sum(); } - private double calculateMinDistancePerRectangle(RedactionEntity entity, int pageNumber, Rectangle2D originalRectangle) { + private double calculateMinDistancePerRectangle(TextEntity entity, int pageNumber, Rectangle2D originalRectangle) { - return entity.getRedactionPositionsPerPage() + return entity.getPositionsOnPagePerPage() .stream() .filter(redactionPosition -> redactionPosition.getPage().getNumber() == pageNumber) - .map(RedactionPosition::getRectanglePerLine) + .map(PositionOnPage::getRectanglePerLine) .flatMap(Collection::stream) .mapToDouble(rectangle -> calculateDistance(rectangle, originalRectangle)) .min() diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/NerEntities.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/NerEntities.java index 7a87ec76..a8a3b4d2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/NerEntities.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/NerEntities.java @@ -4,7 +4,7 @@ import java.util.LinkedList; import java.util.List; import java.util.stream.Stream; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import lombok.AccessLevel; import lombok.AllArgsConstructor; @@ -37,7 +37,7 @@ public class NerEntities { } - public record NerEntity(String value, Boundary boundary, String type) { + public record NerEntity(String value, TextRange textRange, String type) { } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/NerEntitiesAdapter.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/NerEntitiesAdapter.java index 70cd2315..fd6139e6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/NerEntitiesAdapter.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/NerEntitiesAdapter.java @@ -9,7 +9,7 @@ import java.util.stream.Stream; import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity; import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Section; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock; @@ -44,7 +44,7 @@ public class NerEntitiesAdapter { return new NerEntities(addOffsetsAndFlatten(getStringStartOffsetsForMainSections(document), nerEntitiesModel).map(nerEntityModel -> new NerEntities.NerEntity(nerEntityModel.getValue(), - new Boundary(nerEntityModel.getStartOffset(), nerEntityModel.getEndOffset()), + new TextRange(nerEntityModel.getStartOffset(), nerEntityModel.getEndOffset()), nerEntityModel.getType())).toList()); } @@ -63,17 +63,17 @@ public class NerEntitiesAdapter { * @param allowDuplicates allow combining multiple parts of same type * @return A Stream of the combined boundaries */ - public Stream combineNerEntities(NerEntities nerEntities, - Set essentialTypes, - Set typesToCombine, - int maxDistanceBetweenParts, - int minPartsToCombine, - boolean allowDuplicates) { + public Stream combineNerEntities(NerEntities nerEntities, + Set essentialTypes, + Set typesToCombine, + int maxDistanceBetweenParts, + int minPartsToCombine, + boolean allowDuplicates) { List sortedEntities = nerEntities.getNerEntityList() .stream() .filter(entity -> typesToCombine.contains(entity.type())) - .sorted(Comparator.comparingInt(entity -> entity.boundary().start())) + .sorted(Comparator.comparingInt(entity -> entity.textRange().start())) .toList(); if (sortedEntities.isEmpty()) { @@ -86,20 +86,20 @@ public class NerEntitiesAdapter { for (NerEntities.NerEntity startEntity : startEntitiesOfEssentialType) { List currentCluster = new LinkedList<>(); entityClusters.add(currentCluster); - int lastEndOffset = startEntity.boundary().end(); + int lastEndOffset = startEntity.textRange().end(); for (NerEntities.NerEntity entity : sortedEntities) { - if (entity.boundary().start() < lastEndOffset) { + if (entity.textRange().start() < lastEndOffset) { continue; } if (distanceIsLargerThanMaxDistance(lastEndOffset, entity, maxDistanceBetweenParts) || isDuplicate(currentCluster, entity, allowDuplicates)) { currentCluster = new LinkedList<>(); entityClusters.add(currentCluster); currentCluster.add(entity); - lastEndOffset = entity.boundary().end(); + lastEndOffset = entity.textRange().end(); } else { currentCluster.add(entity); - lastEndOffset = entity.boundary().end(); + lastEndOffset = entity.textRange().end(); } } } @@ -120,7 +120,7 @@ public class NerEntitiesAdapter { * * @return A Stream of the combined entities of type "CBI_address" */ - public Stream combineNerEntitiesToCbiAddressDefaults(NerEntities entityRecognitionEntities) { + public Stream combineNerEntitiesToCbiAddressDefaults(NerEntities entityRecognitionEntities) { return combineNerEntities(entityRecognitionEntities, CBI_ADDRESS_ESSENTIAL_TYPES, @@ -139,13 +139,13 @@ public class NerEntitiesAdapter { private static boolean distanceIsLargerThanMaxDistance(int lastEndOffset, NerEntities.NerEntity entity, int maxDistance) { - return (entity.boundary().start() - lastEndOffset) > maxDistance; + return (entity.textRange().start() - lastEndOffset) > maxDistance; } - private static Boundary toContainingBoundary(List nerEntities) { + private static TextRange toContainingBoundary(List nerEntities) { - return Boundary.merge(nerEntities.stream().map(NerEntities.NerEntity::boundary).toList()); + return TextRange.merge(nerEntities.stream().map(NerEntities.NerEntity::textRange).toList()); } @@ -162,7 +162,7 @@ public class NerEntitiesAdapter { private static List getStringStartOffsetsForMainSections(Document document) { - return document.getMainSections().stream().map(Section::getTextBlock).map(TextBlock::getBoundary).map(Boundary::start).toList(); + return document.getMainSections().stream().map(Section::getTextBlock).map(TextBlock::getTextRange).map(TextRange::start).toList(); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/EntityIdentifier.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/EntityIdentifier.java deleted file mode 100644 index 4c6638c3..00000000 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/EntityIdentifier.java +++ /dev/null @@ -1,73 +0,0 @@ -package com.iqser.red.service.redaction.v1.server.redaction.model; - -import java.util.List; - -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType; - -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Getter; -import lombok.experimental.FieldDefaults; - -@Getter -@AllArgsConstructor -@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) -public class EntityIdentifier { - - // must be used for comments to work correctly - String id; - String value; - List entityPosition; - String ruleIdentifier; - String reason; - String legalBasis; - String type; - String section; - EntityType entityType; - boolean applied; - boolean isDictionaryEntry; - boolean isDossierDictionaryEntry; - boolean rectangle; - - - public static EntityIdentifier fromRedactionLogEntry(RedactionLogEntry redactionLogEntry) { - - String ruleIdentifier = redactionLogEntry.getType() + "." + redactionLogEntry.getMatchedRule() + ".0"; - List rectangleWithPages = redactionLogEntry.getPositions().stream().map(RectangleWithPage::fromRedactionLogRectangle).toList(); - return new EntityIdentifier(redactionLogEntry.getId(), - redactionLogEntry.getValue(), - rectangleWithPages, - ruleIdentifier, - redactionLogEntry.getReason(), - redactionLogEntry.getLegalBasis(), - redactionLogEntry.getType(), - redactionLogEntry.getSection(), - redactionLogEntry.isRecommendation() ? EntityType.RECOMMENDATION : EntityType.ENTITY, - redactionLogEntry.isRedacted(), - redactionLogEntry.isDictionaryEntry(), - redactionLogEntry.isDossierDictionaryEntry(), - redactionLogEntry.isRectangle()); - } - - - public static EntityIdentifier fromManualRedactionEntry(ManualRedactionEntry manualRedactionEntry) { - - List rectangleWithPages = manualRedactionEntry.getPositions().stream().map(RectangleWithPage::fromAnnotationRectangle).toList(); - return new EntityIdentifier(manualRedactionEntry.getAnnotationId(), - manualRedactionEntry.getValue(), - rectangleWithPages, - "MAN.0.0", - manualRedactionEntry.getReason(), - manualRedactionEntry.getLegalBasis(), - manualRedactionEntry.getType(), - manualRedactionEntry.getSection(), - EntityType.ENTITY, - true, - false, - false, - manualRedactionEntry.isRectangle()); - } - -} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/ManualEntity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/ManualEntity.java new file mode 100644 index 00000000..cffe0394 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/ManualEntity.java @@ -0,0 +1,91 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model; + +import java.util.List; +import java.util.PriorityQueue; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.Entity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.ManualChangeOverwrite; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule; + +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.experimental.FieldDefaults; + +@Getter +@Builder +@AllArgsConstructor +@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) +public class ManualEntity implements Entity { + + // must be mapped into a TextEntity as is for comments to work correctly + String id; + String value; + List entityPosition; + String ruleIdentifier; + String reason; + String legalBasis; + String type; + String section; + EntityType entityType; + boolean applied; + boolean isDictionaryEntry; + boolean isDossierDictionaryEntry; + boolean rectangle; + + @Builder.Default + PriorityQueue matchedRuleList = new PriorityQueue<>(); + @Builder.Default + ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite(); + + + public static ManualEntity fromRedactionLogEntry(RedactionLogEntry redactionLogEntry) { + + String ruleIdentifier = redactionLogEntry.getType() + "." + redactionLogEntry.getMatchedRule() + ".0"; + List rectangleWithPages = redactionLogEntry.getPositions().stream().map(RectangleWithPage::fromRedactionLogRectangle).toList(); + return ManualEntity.builder() + .id(redactionLogEntry.getId()) + .value(redactionLogEntry.getValue()) + .entityPosition(rectangleWithPages) + .ruleIdentifier(ruleIdentifier) + .reason(redactionLogEntry.getReason()) + .legalBasis(redactionLogEntry.getLegalBasis()) + .type(redactionLogEntry.getType()) + .section(redactionLogEntry.getSection()) + .entityType(redactionLogEntry.isRecommendation() ? EntityType.RECOMMENDATION : EntityType.ENTITY) + .applied(redactionLogEntry.isRedacted()) + .isDictionaryEntry(redactionLogEntry.isDictionaryEntry()) + .isDossierDictionaryEntry(redactionLogEntry.isDossierDictionaryEntry()) + .rectangle(redactionLogEntry.isRectangle()) + .build(); + } + + + public static ManualEntity fromManualRedactionEntry(ManualRedactionEntry manualRedactionEntry) { + + List rectangleWithPages = manualRedactionEntry.getPositions().stream().map(RectangleWithPage::fromAnnotationRectangle).toList(); + ManualChangeOverwrite manualChangeOverwrite = new ManualChangeOverwrite(); + manualChangeOverwrite.addChange(manualRedactionEntry); + return ManualEntity.builder() + .id(manualRedactionEntry.getAnnotationId()) + .value(manualRedactionEntry.getValue()) + .entityPosition(rectangleWithPages) + .ruleIdentifier("MAN.0.0") + .reason(manualRedactionEntry.getReason()) + .legalBasis(manualRedactionEntry.getLegalBasis()) + .type(manualRedactionEntry.getType()) + .section(manualRedactionEntry.getSection()) + .entityType(EntityType.ENTITY) + .applied(true) + .isDictionaryEntry(false) + .isDossierDictionaryEntry(false) + .rectangle(manualRedactionEntry.isRectangle()) + .manualOverwrite(manualChangeOverwrite) + .build(); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/dictionary/Dictionary.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/dictionary/Dictionary.java index b4dac375..0bb75596 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/dictionary/Dictionary.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/dictionary/Dictionary.java @@ -17,7 +17,7 @@ import org.apache.commons.lang3.StringUtils; import com.iqser.red.service.redaction.v1.server.exception.NotFoundException; import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns; import lombok.Data; @@ -116,28 +116,28 @@ public class Dictionary { } - public void recommendEverywhere(RedactionEntity redactionEntity) { + public void recommendEverywhere(TextEntity textEntity) { - addLocalDictionaryEntry(redactionEntity.getType(), redactionEntity.getValue(), redactionEntity.getMatchedRuleList(), false); + addLocalDictionaryEntry(textEntity.getType(), textEntity.getValue(), textEntity.getMatchedRuleList(), false); } - public void recommendEverywhereWithLastNameSeparately(RedactionEntity redactionEntity) { + public void recommendEverywhereWithLastNameSeparately(TextEntity textEntity) { - addLocalDictionaryEntry(redactionEntity.getType(), redactionEntity.getValue(), redactionEntity.getMatchedRuleList(), true); + addLocalDictionaryEntry(textEntity.getType(), textEntity.getValue(), textEntity.getMatchedRuleList(), true); } - public void addMultipleAuthorsAsRecommendation(RedactionEntity redactionEntity) { + public void addMultipleAuthorsAsRecommendation(TextEntity textEntity) { - String cleanedWord = redactionEntity.getValue().replaceAll(",", " ").replaceAll(" ", " ").trim() + " "; + String cleanedWord = textEntity.getValue().replaceAll(",", " ").replaceAll(" ", " ").trim() + " "; Pattern pattern = Patterns.AUTHOR_TABLE_SPLITTER; Matcher matcher = pattern.matcher(cleanedWord); while (matcher.find()) { String match = matcher.group().trim(); if (match.length() >= 3) { - addLocalDictionaryEntry(redactionEntity.getType(), match, redactionEntity.getMatchedRuleList(), true); + addLocalDictionaryEntry(textEntity.getType(), match, textEntity.getMatchedRuleList(), true); } } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/dictionary/SearchImplementation.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/dictionary/SearchImplementation.java index cae7949b..bd095b02 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/dictionary/SearchImplementation.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/dictionary/SearchImplementation.java @@ -9,7 +9,7 @@ import java.util.stream.Collectors; import org.ahocorasick.trie.Trie; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import lombok.Data; @@ -83,29 +83,29 @@ public class SearchImplementation { } - public List getBoundaries(CharSequence text) { + public List getBoundaries(CharSequence text) { if (this.values.isEmpty()) { return new ArrayList<>(); } if (this.pattern != null) { - return this.pattern.matcher(text).results().map(r -> new Boundary(r.start(), r.end())).collect(Collectors.toList()); + return this.pattern.matcher(text).results().map(r -> new TextRange(r.start(), r.end())).collect(Collectors.toList()); } else { - return this.trie.parseText(text).stream().map(r -> new Boundary(r.getStart(), r.getEnd() + 1)).collect(Collectors.toList()); + return this.trie.parseText(text).stream().map(r -> new TextRange(r.getStart(), r.getEnd() + 1)).collect(Collectors.toList()); } } - public List getBoundaries(CharSequence text, Boundary region) { + public List getBoundaries(CharSequence text, TextRange region) { if (this.values.isEmpty()) { return new ArrayList<>(); } CharSequence subSequence = text.subSequence(region.start(), region.end()); if (this.pattern != null) { - return this.pattern.matcher(subSequence).results().map(r -> new Boundary(r.start() + region.start(), r.end() + region.start())).collect(Collectors.toList()); + return this.pattern.matcher(subSequence).results().map(r -> new TextRange(r.start() + region.start(), r.end() + region.start())).collect(Collectors.toList()); } else { - return this.trie.parseText(subSequence).stream().map(r -> new Boundary(r.getStart() + region.start(), r.getEnd() + region.start() + 1)).collect(Collectors.toList()); + return this.trie.parseText(subSequence).stream().map(r -> new TextRange(r.getStart() + region.start(), r.getEnd() + region.start() + 1)).collect(Collectors.toList()); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java index 49a53192..276fd5a9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java @@ -16,6 +16,7 @@ import com.iqser.gin4.commons.metrics.meters.FunctionTimerValues; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult; import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Comment; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.legalbasis.LegalBasis; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog; @@ -29,7 +30,7 @@ import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter; -import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier; +import com.iqser.red.service.redaction.v1.server.redaction.model.ManualEntity; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryIncrement; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryVersion; @@ -61,6 +62,7 @@ public class AnalyzeService { RedactionServiceSettings redactionServiceSettings; ImportedRedactionService importedRedactionService; SectionFinderService sectionFinderService; + ManualRedactionEntryService manualRedactionEntryService; FunctionTimerValues redactmanagerAnalyzePagewiseValues; @@ -82,7 +84,7 @@ public class AnalyzeService { var wrapper = droolsExecutionService.getLatestKieContainer(analyzeRequest.getDossierTemplateId()); log.info("Updated Rules to Version {} for file {} in dossier {}", wrapper.rulesVersion(), analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - List notFoundManualRedactionEntries = addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest, document); + List notFoundManualRedactionEntries = manualRedactionEntryService.addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest, document); entityRedactionService.addDictionaryEntities(dictionary, document); log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); @@ -90,7 +92,10 @@ public class AnalyzeService { Set addedFileAttributes = entityRedactionService.addRuleEntities(dictionary, document, wrapper.container(), analyzeRequest, nerEntities); log.info("Finished Rule Execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - List redactionLogEntries = redactionLogCreatorService.createRedactionLog(document, analyzeRequest.getDossierTemplateId(), notFoundManualRedactionEntries); + List redactionLogEntries = redactionLogCreatorService.createRedactionLog(document, + analyzeRequest.getDossierTemplateId(), + notFoundManualRedactionEntries, + getComments(analyzeRequest)); List legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId()); RedactionLog redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(), @@ -109,24 +114,19 @@ public class AnalyzeService { true); redactionLog.setRedactionLogEntry(importedRedactionFilteredEntries); - return finalizeAnalysis(analyzeRequest, - startTime, - redactionLog, - document.getNumberOfPages(), - dictionary.getVersion(), - false, - addedFileAttributes); + return finalizeAnalysis(analyzeRequest, startTime, redactionLog, document.getNumberOfPages(), dictionary.getVersion(), false, addedFileAttributes); } - private List addManualRedactionEntriesAndReturnNotFoundEntries(AnalyzeRequest analyzeRequest, Document document) { + private static Map> getComments(AnalyzeRequest analyzeRequest) { - List notFoundManualRedactionEntries = Collections.emptyList(); - if (analyzeRequest.getManualRedactions() != null) { - notFoundManualRedactionEntries = entityRedactionService.addManualAddRedactionEntities(analyzeRequest.getManualRedactions().getEntriesToAdd(), document); - log.info("Added Manual redaction entries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + if (analyzeRequest.getManualRedactions() == null) { + return Collections.emptyMap(); } - return notFoundManualRedactionEntries; + if (analyzeRequest.getManualRedactions().getComments() == null) { + return Collections.emptyMap(); + } + return analyzeRequest.getManualRedactions().getComments(); } @@ -170,7 +170,7 @@ public class AnalyzeService { var wrapper = droolsExecutionService.getLatestKieContainer(analyzeRequest.getDossierTemplateId()); log.info("Updated Rules to version {} for file {} in dossier {}", wrapper.rulesVersion(), analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - List notFoundManualRedactionEntries = addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest, document); + List notFoundManualRedactionEntries = manualRedactionEntryService.addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest, document); Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId()); log.info("Updated Dictionaries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); @@ -186,7 +186,10 @@ public class AnalyzeService { nerEntities); log.info("Finished Rule Execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - List newRedactionLogEntries = redactionLogCreatorService.createRedactionLog(document, analyzeRequest.getDossierTemplateId(), notFoundManualRedactionEntries); + List newRedactionLogEntries = redactionLogCreatorService.createRedactionLog(document, + analyzeRequest.getDossierTemplateId(), + notFoundManualRedactionEntries, + getComments(analyzeRequest)); var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId(), diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java index d3e682a4..d58ab3dd 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java @@ -36,7 +36,7 @@ import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService; import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService; -import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService; +import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService; import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; import com.iqser.red.service.redaction.v1.server.redaction.model.KieWrapper; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary; @@ -84,11 +84,11 @@ public class DroolsExecutionService { KieSession kieSession = kieContainer.newKieSession(); EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService, kieSession); - ManualRedactionApplicationService manualRedactionApplicationService = new ManualRedactionApplicationService(entityCreationService); + ManualChangesApplicationService manualChangesApplicationService = new ManualChangesApplicationService(entityCreationService); kieSession.setGlobal("document", document); kieSession.setGlobal("entityCreationService", entityCreationService); - kieSession.setGlobal("manualRedactionApplicationService", manualRedactionApplicationService); + kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService); kieSession.setGlobal("dictionary", dictionary); kieSession.insert(document); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index 765022c3..c58fe971 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -9,16 +9,14 @@ import org.springframework.stereotype.Service; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine; -import com.iqser.red.service.redaction.v1.server.redaction.adapter.CustomEntityCreationAdapter; import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService; import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService; +import com.iqser.red.service.redaction.v1.server.redaction.adapter.CustomEntityCreationAdapter; import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; -import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.SearchImplementation; @@ -74,12 +72,6 @@ public class EntityRedactionService { return allFileAttributes.stream().filter(fileAttribute -> !analyzeRequest.getFileAttributes().contains(fileAttribute)).collect(Collectors.toUnmodifiableSet()); } - public List addManualAddRedactionEntities(Set manualRedactionEntries, Document document) { - - // Entities are automatically added to the DocumentGraph and don't need to be inserted again. - return customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(manualRedactionEntries, document); - } - public void addDictionaryEntities(Dictionary dictionary, SemanticNode node) { @@ -98,7 +90,7 @@ public class EntityRedactionService { boolean isDossierDictionaryEntry) { EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService); - searchImplementation.getBoundaries(node.getTextBlock(), node.getBoundary()) + searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange()) .stream() .filter(boundary -> entityCreationService.isValidEntityBoundary(node.getTextBlock(), boundary)) .map(bounds -> entityCreationService.forceByBoundary(bounds, type, entityType, node)) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualChangeFactory.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualChangeFactory.java new file mode 100644 index 00000000..b89409de --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualChangeFactory.java @@ -0,0 +1,43 @@ +package com.iqser.red.service.redaction.v1.server.redaction.service; + +import java.util.List; + +import org.springframework.stereotype.Service; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualChange; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualRedactionType; + +@Service +public class ManualChangeFactory { + + public List toManualChangeList(List manualChanges, boolean isHint) { + + return manualChanges.stream().map(baseAnnotation -> toManualChange(baseAnnotation, isHint)).toList(); + } + + + private ManualChange toManualChange(BaseAnnotation baseAnnotation, boolean isHint) { + + ManualChange manualChange = ManualChange.from(baseAnnotation); + if (baseAnnotation instanceof ManualImageRecategorization imageRecategorization) { + manualChange.withManualRedactionType(ManualRedactionType.RECATEGORIZE).withChange("type", imageRecategorization.getType()); + } else if (baseAnnotation instanceof IdRemoval manualRemoval) { + manualChange.withManualRedactionType(manualRemoval.isRemoveFromDictionary() ? ManualRedactionType.REMOVE_FROM_DICTIONARY : ManualRedactionType.REMOVE_LOCALLY); + } else if (baseAnnotation instanceof ManualForceRedaction) { + manualChange.withManualRedactionType(isHint ? ManualRedactionType.FORCE_HINT : ManualRedactionType.FORCE_REDACT); + } else if (baseAnnotation instanceof ManualResizeRedaction manualResizeRedact) { + manualChange.withManualRedactionType(ManualRedactionType.RESIZE).withChange("value", manualResizeRedact.getValue()); + } else if (baseAnnotation instanceof ManualRedactionEntry manualRedactionEntry) { + manualChange.withManualRedactionType(manualRedactionEntry.isAddToDictionary() ? ManualRedactionType.ADD_TO_DICTIONARY : ManualRedactionType.ADD_LOCALLY) + .withChange("value", manualRedactionEntry.getValue()); + } + return manualChange; + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionEntryService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionEntryService.java new file mode 100644 index 00000000..632ce8f5 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionEntryService.java @@ -0,0 +1,58 @@ +package com.iqser.red.service.redaction.v1.server.redaction.service; + +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.stream.Stream; + +import org.springframework.stereotype.Service; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation; +import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; +import com.iqser.red.service.redaction.v1.server.redaction.adapter.CustomEntityCreationAdapter; +import com.iqser.red.service.redaction.v1.server.redaction.model.ManualEntity; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Service +@RequiredArgsConstructor +public class ManualRedactionEntryService { + + private final CustomEntityCreationAdapter customEntityCreationAdapter; + + + public List addManualRedactionEntriesAndReturnNotFoundEntries(AnalyzeRequest analyzeRequest, Document document) { + + List notFoundManualRedactionEntries = Collections.emptyList(); + if (analyzeRequest.getManualRedactions() != null) { + notFoundManualRedactionEntries = customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(analyzeRequest.getManualRedactions() + .getEntriesToAdd(), document); + log.info("Added Manual redaction entries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + } + if (notFoundManualRedactionEntries.isEmpty()) { + return Collections.emptyList(); + } + List manualChanges = allManualChangesExceptAdd(analyzeRequest.getManualRedactions()); + for (ManualEntity notFoundManualRedactionEntry : notFoundManualRedactionEntries) { + manualChanges.stream() + .filter(change -> change.getAnnotationId().equals(notFoundManualRedactionEntry.getId())) + .forEach(change -> notFoundManualRedactionEntry.getManualOverwrite().addChange(change)); + } + return notFoundManualRedactionEntries; + } + + + private List allManualChangesExceptAdd(ManualRedactions manualRedactions) { + + return Stream.of(manualRedactions.getForceRedactions(), + manualRedactions.getResizeRedactions(), + manualRedactions.getImageRecategorization(), + manualRedactions.getIdsToRemove(), + manualRedactions.getLegalBasisChanges()).flatMap(Collection::stream).map(baseAnnotation -> (BaseAnnotation) baseAnnotation).toList(); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java index 3fb09d21..cb39094b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java @@ -6,22 +6,24 @@ import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.Set; import java.util.stream.Collectors; import org.springframework.stereotype.Service; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Comment; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogComment; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.PositionOnPage; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Image; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType; -import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations; -import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier; -import com.iqser.red.service.redaction.v1.server.redaction.model.RectangleWithPage; +import com.iqser.red.service.redaction.v1.server.redaction.model.ManualEntity; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -32,49 +34,52 @@ import lombok.extern.slf4j.Slf4j; public class RedactionLogCreatorService { private final DictionaryService dictionaryService; + private final ManualChangeFactory manualChangeFactory; - public List createRedactionLog(Document document, String dossierTemplateId, List notFoundManualRedactionEntries) { + public List createRedactionLog(Document document, + String dossierTemplateId, + List notFoundManualRedactionEntries, + Map> comments) { List entries = new ArrayList<>(); - Set processedIds = new HashSet<>(); document.getEntities() .stream() .filter(RedactionLogCreatorService::isEntityOrRecommendationType) - .filter(entity -> !entity.isRemoved()) - .forEach(entityNode -> entries.addAll(toRedactionLogEntries(entityNode, processedIds, dossierTemplateId))); - document.streamAllImages().filter(image -> !image.isRemoved()).forEach(imageNode -> entries.add(createRedactionLogEntry(imageNode, dossierTemplateId))); - notFoundManualRedactionEntries.forEach(entityIdentifier -> entries.add(createRedactionLogEntry(entityIdentifier, dossierTemplateId))); + .filter(entity -> !entity.removed()) + .forEach(entityNode -> entries.addAll(toRedactionLogEntries(entityNode, dossierTemplateId, comments))); + document.streamAllImages().filter(image -> !image.removed()).forEach(imageNode -> entries.add(createRedactionLogEntry(imageNode, dossierTemplateId, comments))); + notFoundManualRedactionEntries.forEach(entityIdentifier -> entries.add(createRedactionLogEntry(entityIdentifier, dossierTemplateId, comments))); return entries; } - private static boolean isEntityOrRecommendationType(RedactionEntity redactionEntity) { + private static boolean isEntityOrRecommendationType(TextEntity textEntity) { - return redactionEntity.getEntityType() == EntityType.ENTITY || redactionEntity.getEntityType() == EntityType.RECOMMENDATION; + return textEntity.getEntityType() == EntityType.ENTITY || textEntity.getEntityType() == EntityType.RECOMMENDATION; } - private List toRedactionLogEntries(RedactionEntity redactionEntity, Set processedIds, String dossierTemplateId) { + private List toRedactionLogEntries(TextEntity textEntity, String dossierTemplateId, Map> comments) { + Set processedIds = new HashSet<>(); List redactionLogEntities = new ArrayList<>(); - // Duplicates can exist due table extraction columns over multiple rows. + for (PositionOnPage positionOnPage : textEntity.getPositionsOnPagePerPage()) { - for (RedactionPosition redactionPosition : redactionEntity.getRedactionPositionsPerPage()) { - - RedactionLogEntry redactionLogEntry = createRedactionLogEntry(redactionEntity, dossierTemplateId); - - if (processedIds.contains(redactionPosition.getId())) { + // Duplicates should be removed. They might exist due to table extraction duplicating cells spanning multiple columns/rows. + if (processedIds.contains(positionOnPage.getId())) { continue; } + processedIds.add(positionOnPage.getId()); - processedIds.add(redactionPosition.getId()); - redactionLogEntry.setId(redactionPosition.getId()); + RedactionLogEntry redactionLogEntry = createRedactionLogEntry(textEntity, dossierTemplateId); + redactionLogEntry.setId(positionOnPage.getId()); + redactionLogEntry.setComments(buildRedactionLogComments(comments, positionOnPage.getId())); - List rectanglesPerLine = redactionPosition.getRectanglePerLine() + List rectanglesPerLine = positionOnPage.getRectanglePerLine() .stream() - .map(rectangle2D -> RectangleTransformations.toRedactionLogRectangle(rectangle2D, redactionPosition.getPage().getNumber())) + .map(rectangle2D -> toRedactionLogRectangle(rectangle2D, positionOnPage.getPage().getNumber())) .toList(); redactionLogEntry.setPositions(rectanglesPerLine); @@ -85,88 +90,128 @@ public class RedactionLogCreatorService { } - private RedactionLogEntry createRedactionLogEntry(RedactionEntity entity, String dossierTemplateId) { + private List buildRedactionLogComments(Map> commentsPerId, String id) { + + if (!commentsPerId.containsKey(id)) { + return Collections.emptyList(); + } + List comments = commentsPerId.get(id); + if (comments == null || comments.isEmpty()) { + return Collections.emptyList(); + } + return toRedactionLogComments(comments); + } + + + private List toRedactionLogComments(List comments) { + + return comments.stream().map(this::toRedactionLogComment).toList(); + } + + + private RedactionLogComment toRedactionLogComment(Comment comment) { + + return new RedactionLogComment(comment.getId(), + comment.getUser(), + comment.getText(), + comment.getAnnotationId(), + comment.getFileId(), + comment.getDate(), + comment.getSoftDeletedTime()); + } + + + private RedactionLogEntry createRedactionLogEntry(TextEntity entity, String dossierTemplateId) { Set referenceIds = new HashSet<>(); - entity.getReferences().stream().filter(RedactionEntity::isActive).forEach(ref -> ref.getRedactionPositionsPerPage().forEach(pos -> referenceIds.add(pos.getId()))); + entity.references().stream().filter(TextEntity::active).forEach(ref -> ref.getPositionsOnPagePerPage().forEach(pos -> referenceIds.add(pos.getId()))); int sectionNumber = entity.getDeepestFullyContainingNode().getTreeId().isEmpty() ? 0 : entity.getDeepestFullyContainingNode().getTreeId().get(0); - + boolean isHint = isHint(entity.getType(), dossierTemplateId); return RedactionLogEntry.builder() - .color(getColor(entity.getType(), dossierTemplateId, entity.isApplied())) - .reason(entity.getMatchedRule().getReason()) - .legalBasis(entity.getMatchedRule().getLegalBasis()) - .value(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue()) + .color(getColor(entity.getType(), dossierTemplateId, entity.applied())) + .reason(entity.buildReasonWithManualChangeDescriptions()) + .legalBasis(entity.legalBasis()) + .value(entity.getManualOverwrite().getValue().orElse(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue())) .type(entity.getType()) - .redacted(entity.isApplied()) - .isHint(isHint(entity.getType(), dossierTemplateId)) + .redacted(entity.applied()) + .isHint(isHint) .isRecommendation(entity.getEntityType().equals(EntityType.RECOMMENDATION)) .isFalsePositive(entity.getEntityType().equals(EntityType.FALSE_POSITIVE) || entity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION)) - .section(entity.getDeepestFullyContainingNode().toString()) + .section(entity.getManualOverwrite().getSection().orElse(entity.getDeepestFullyContainingNode().toString())) .sectionNumber(sectionNumber) .matchedRule(entity.getMatchedRule().getRuleIdentifier().toString()) .isDictionaryEntry(entity.isDictionaryEntry()) .textAfter(entity.getTextAfter()) .textBefore(entity.getTextBefore()) - .startOffset(entity.getBoundary().start()) - .endOffset(entity.getBoundary().end()) + .startOffset(entity.getTextRange().start()) + .endOffset(entity.getTextRange().end()) .isDossierDictionaryEntry(entity.isDossierDictionaryEntry()) .engines(entity.getEngines() != null ? entity.getEngines() : Collections.emptySet()) .reference(referenceIds) + .manualChanges(manualChangeFactory.toManualChangeList(entity.getManualOverwrite().getManualChangeLog(), isHint)) .build(); } - public RedactionLogEntry createRedactionLogEntry(EntityIdentifier entityIdentifier, String dossierTemplateId) { - List pageNumbers = entityIdentifier.getEntityPosition().stream().map(RectangleWithPage::pageNumber).toList(); - List rectanglesPerLine = entityIdentifier.getEntityPosition().stream().map(RectangleWithPage::rectangle2D).toList(); + + public RedactionLogEntry createRedactionLogEntry(ManualEntity manualEntity, String dossierTemplateId, Map> comments) { + + String type = manualEntity.getManualOverwrite().getType().orElse(manualEntity.getType()); + boolean isHint = isHint(type, dossierTemplateId); return RedactionLogEntry.builder() - .id(entityIdentifier.getId()) - .color(getColor(entityIdentifier.getType(), dossierTemplateId, entityIdentifier.isApplied())) - .reason(entityIdentifier.getReason()) - .legalBasis(entityIdentifier.getLegalBasis()) - .value(entityIdentifier.getValue()) - .type(entityIdentifier.getType()) - .redacted(entityIdentifier.isApplied()) - .isHint(isHint(entityIdentifier.getType(), dossierTemplateId)) - .isRecommendation(entityIdentifier.getEntityType().equals(EntityType.RECOMMENDATION)) - .isFalsePositive(entityIdentifier.getEntityType().equals(EntityType.FALSE_POSITIVE) || entityIdentifier.getEntityType().equals(EntityType.FALSE_RECOMMENDATION)) - .section(entityIdentifier.getSection()) + .id(manualEntity.getId()) + .color(getColor(type, dossierTemplateId, manualEntity.applied())) + .reason(manualEntity.buildReasonWithManualChangeDescriptions()) + .legalBasis(manualEntity.legalBasis()) + .value(manualEntity.getManualOverwrite().getValue().orElse(manualEntity.getValue())) + .type(type) + .redacted(manualEntity.applied()) + .isHint(isHint) + .isRecommendation(manualEntity.getEntityType().equals(EntityType.RECOMMENDATION)) + .isFalsePositive(manualEntity.getEntityType().equals(EntityType.FALSE_POSITIVE) || manualEntity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION)) + .section(manualEntity.getManualOverwrite().getSection().orElse(manualEntity.getSection())) .sectionNumber(0) .matchedRule("ManualRedaction") - .rectangle(entityIdentifier.isRectangle()) - .isDictionaryEntry(entityIdentifier.isDictionaryEntry()) + .rectangle(manualEntity.isRectangle()) + .isDictionaryEntry(manualEntity.isDictionaryEntry()) + .isDossierDictionaryEntry(manualEntity.isDossierDictionaryEntry()) .textAfter("") .textBefore("") .startOffset(-1) .endOffset(-1) - .isDossierDictionaryEntry(entityIdentifier.isDossierDictionaryEntry()) - .positions(entityIdentifier.getEntityPosition() + .positions(manualEntity.getEntityPosition() .stream() - .map(entityPosition -> RectangleTransformations.toRedactionLogRectangle(entityPosition.rectangle2D(), entityPosition.pageNumber())) + .map(entityPosition -> toRedactionLogRectangle(entityPosition.rectangle2D(), entityPosition.pageNumber())) .collect(Collectors.toList())) .engines(Collections.emptySet()) .reference(Collections.emptySet()) + .manualChanges(manualChangeFactory.toManualChangeList(manualEntity.getManualOverwrite().getManualChangeLog(), isHint)) + .comments(buildRedactionLogComments(comments, manualEntity.getId())) .build(); } - public RedactionLogEntry createRedactionLogEntry(Image image, String dossierTemplateId) { - String imageType = image.getImageType().equals(ImageType.OTHER) ? "image" : image.getImageType().toString().toLowerCase(Locale.ROOT); + public RedactionLogEntry createRedactionLogEntry(Image image, String dossierTemplateId, Map> comments) { + + String imageType = image.getImageType().equals(ImageType.OTHER) ? "image" : image.getImageType().toString().toLowerCase(Locale.ENGLISH); + boolean isHint = dictionaryService.isHint(imageType, dossierTemplateId); return RedactionLogEntry.builder() .id(image.getId()) - .color(getColor(image.getImageType().toString().toLowerCase(Locale.ROOT), dossierTemplateId, image.isApplied())) + .color(getColor(imageType, dossierTemplateId, image.applied())) .isImage(true) .type(imageType) - .redacted(image.isApplied()) - .reason(image.getMatchedRule().getReason()) - .legalBasis(image.getMatchedRule().getLegalBasis()) + .redacted(image.applied()) + .reason(image.buildReasonWithManualChangeDescriptions()) + .legalBasis(image.legalBasis()) .matchedRule(image.getMatchedRule().getRuleIdentifier().toString()) - .isHint(dictionaryService.isHint(image.getImageType().toString().toLowerCase(Locale.ROOT), dossierTemplateId)) + .isHint(isHint) .isDictionaryEntry(false) .isRecommendation(false) - .positions(List.of(RectangleTransformations.toRedactionLogRectangle(image.getPosition(), image.getPage().getNumber()))) + .positions(List.of(toRedactionLogRectangle(image.getPosition(), image.getPage().getNumber()))) .sectionNumber(image.getTreeId().get(0)) - .section(image.getParent().toString()) + .section(image.getManualOverwrite().getSection().orElse(image.getParent().toString())) .imageHasTransparency(image.isTransparent()) + .manualChanges(manualChangeFactory.toManualChangeList(image.getManualOverwrite().getManualChangeLog(), isHint)) + .comments(buildRedactionLogComments(comments, image.getId())) .build(); } @@ -186,4 +231,13 @@ public class RedactionLogCreatorService { return dictionaryService.isHint(type, dossierTemplateId); } + + private Rectangle toRedactionLogRectangle(Rectangle2D rectangle2D, int pageNumber) { + + return new Rectangle(new Point((float) rectangle2D.getMinX(), (float) (rectangle2D.getMinY() + rectangle2D.getHeight())), + (float) rectangle2D.getWidth(), + -(float) rectangle2D.getHeight(), + pageNumber); + } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/SeparatorUtils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/SeparatorUtils.java index 2d216334..c872bbe2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/SeparatorUtils.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/SeparatorUtils.java @@ -3,7 +3,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.utils; import java.util.Set; import java.util.regex.Pattern; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock; import lombok.experimental.UtilityClass; @@ -14,7 +14,7 @@ import lombok.extern.slf4j.Slf4j; public final class SeparatorUtils { private final static Pattern punctuationPattern = Pattern.compile("\\p{Punct}"); - private final static Set quotes = Set.of('\'', '\u0022', '\u00AB', '\u00BB', '\u2018', '\u2019', '\u201A', '\u201C', '\u201D', '\u201E', '\u2039', '\u203A'); + private final static Set quotes = Set.of('\'', '"', '«', '»', '‘', '’', '‚', '“', '”', '„', '‹', '›'); private final static Set japaneseAltPunctuationMarks = Set.of(65288, 65289, 65294, 65339, 65341, 65371, 65373, 65375, 65376, 12443, 12444, 65309, 65306); @@ -32,9 +32,9 @@ public final class SeparatorUtils { } - public static boolean isWhiteSpacesOrSeparatorsOnly(TextBlock textBlock, Boundary boundary) { + public static boolean isWhiteSpacesOrSeparatorsOnly(TextBlock textBlock, TextRange textRange) { - String stringWithoutWhiteSpace = textBlock.subSequence(boundary).toString().replace(" ", ""); + String stringWithoutWhiteSpace = textBlock.subSequence(textRange).toString().replace(" ", ""); int numberOfSeparators = 0; for (int i = 0; i < stringWithoutWhiteSpace.length(); i++) { if (isSeparator(stringWithoutWhiteSpace.charAt(i))) { @@ -45,25 +45,25 @@ public final class SeparatorUtils { } - public static boolean boundaryIsSurroundedBySeparators(TextBlock textBlock, Boundary boundary) { + public static boolean boundaryIsSurroundedBySeparators(TextBlock textBlock, TextRange textRange) { - return validateStart(textBlock, boundary) && validateEnd(textBlock, boundary) && !isWhiteSpacesOrSeparatorsOnly(textBlock, boundary); + return validateStart(textBlock, textRange) && validateEnd(textBlock, textRange) && !isWhiteSpacesOrSeparatorsOnly(textBlock, textRange); } - private static boolean validateEnd(TextBlock textBlock, Boundary boundary) { + private static boolean validateEnd(TextBlock textBlock, TextRange textRange) { - return boundary.end() == textBlock.getBoundary().end() ||// - SeparatorUtils.isSeparator(textBlock.charAt(boundary.end())) ||// - SeparatorUtils.isJapaneseSeparator(textBlock.charAt(boundary.end() - 1)); + return textRange.end() == textBlock.getTextRange().end() ||// + SeparatorUtils.isSeparator(textBlock.charAt(textRange.end())) ||// + SeparatorUtils.isJapaneseSeparator(textBlock.charAt(textRange.end() - 1)); } - private static boolean validateStart(TextBlock textBlock, Boundary boundary) { + private static boolean validateStart(TextBlock textBlock, TextRange textRange) { - return boundary.start() == textBlock.getBoundary().start() ||// - SeparatorUtils.isSeparator(textBlock.charAt(boundary.start() - 1)) ||// - SeparatorUtils.isJapaneseSeparator(textBlock.charAt(boundary.start())); + return textRange.start() == textBlock.getTextRange().start() ||// + SeparatorUtils.isSeparator(textBlock.charAt(textRange.start() - 1)) ||// + SeparatorUtils.isJapaneseSeparator(textBlock.charAt(textRange.start())); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 0b4d807e..83af03a9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -5,10 +5,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.when; -import static org.wildfly.common.Assert.assertTrue; -import java.awt.geom.Rectangle2D; -import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.net.URI; @@ -19,7 +16,6 @@ import java.nio.file.Paths; import java.time.OffsetDateTime; import java.time.ZoneOffset; import java.util.ArrayList; -import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -31,7 +27,6 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.autoconfigure.EnableAutoConfiguration; import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration; import org.springframework.boot.test.context.SpringBootTest; @@ -62,21 +57,13 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSON import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.configuration.Colors; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type; -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ChangeType; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point; -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest; import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse; import com.iqser.red.service.redaction.v1.server.document.data.mapper.DocumentGraphMapper; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Section; -import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService; -import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService; -import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService; import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; import com.iqser.red.storage.commons.StorageAutoConfiguration; @@ -93,11 +80,6 @@ import lombok.SneakyThrows; public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { private static final String RULES = loadFromClassPath("drools/rules.drl"); - @Autowired - private EntityEnrichmentService entityEnrichmentService; - - @Autowired - private DroolsExecutionService droolsExecutionService; @Configuration @EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class}) @@ -113,11 +95,6 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { } } - @BeforeEach - public void invalidateCaches() { - - // droolsExecutionService.invalidateKieContainerCache(); - } @BeforeEach @@ -234,7 +211,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { @Test public void titleExtraction() throws IOException { - AnalyzeRequest request = uploadFileToStorage("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/new/crafted document.pdf"); System.out.println("Start Full integration test"); analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); System.out.println("Finished structure analysis"); @@ -393,7 +370,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { continue loop; } if (redactionLogEntry.getSectionNumber() == section.getTreeId().get(0)) { - String value = section.getTextBlock().subSequence(new Boundary(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset())).toString(); + String value = section.getTextBlock().subSequence(new TextRange(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset())).toString(); if (redactionLogEntry.getValue().equalsIgnoreCase(value)) { correctFound++; } else { @@ -542,7 +519,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { List valuesInDocument = redactionLog.getRedactionLogEntry() .stream() .filter(e -> !e.isImage()) - .map(redactionLogEntry -> new Boundary(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset())) + .map(redactionLogEntry -> new TextRange(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset())) .map(boundary -> documentGraph.getTextBlock().subSequence(boundary).toString()) .toList(); List valuesInRedactionLog = redactionLog.getRedactionLogEntry().stream().filter(e -> !e.isImage()).map(RedactionLogEntry::getValue).toList(); @@ -697,75 +674,6 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { } - @Test - @SneakyThrows - public void testManualResizeRedactionRemovesContainedEntities() { - - String filePath = "files/new/crafted document.pdf"; - AnalyzeRequest request = uploadFileToStorage(filePath); - analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); - AnalyzeResult result = analyzeService.analyze(request); - - String testEntityValue1 = "Desiree"; - String testEntityValue2 = "Melanie"; - RedactionLog redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); - assertEquals(2, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue1)).count()); - assertEquals(2, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue2)).count()); - - Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(TEST_DOSSIER_ID, TEST_FILE_ID)); - String expandedEntityKeyword = "Lorem ipsum dolor sit amet, consectetur adipiscing elit Desiree et al sed do eiusmod tempor incididunt ut labore et dolore magna aliqua Melanie et al. Reference No 12345 Lorem ipsum."; - EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService); - RedactionEntity expandedEntity = entityCreationService.byString(expandedEntityKeyword, "PII", EntityType.ENTITY, document).findFirst().get(); - - String idToResize = redactionLog.getRedactionLogEntry() - .stream() - .filter(entry -> entry.getValue().equals(testEntityValue1)) - .max(Comparator.comparingInt(RedactionLogEntry::getStartOffset)) - .get() - .getId(); - List resizedPositions = expandedEntity.getRedactionPositionsPerPage() - .get(0) - .getRectanglePerLine() - .stream() - .map(rectangle2D -> toAnnotationRectangle(rectangle2D, 3)) - .toList(); - ManualResizeRedaction manualResizeRedaction = ManualResizeRedaction.builder() - .annotationId(idToResize) - .value(expandedEntityKeyword) - .positions(resizedPositions) - .status(AnnotationStatus.APPROVED) - .build(); - - ManualRedactions manualRedactions = new ManualRedactions(); - manualRedactions.getResizeRedactions().add(manualResizeRedaction); - request.setManualRedactions(manualRedactions); - AnalyzeResult reanalyzeResult = analyzeService.reanalyze(request); - - redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); - String annotatedFileName = Paths.get(filePath).getFileName().toString().replace(".pdf", "_annotated2.pdf"); - File tmpFile = Paths.get(OsUtils.getTemporaryDirectory(), annotatedFileName).toFile(); - AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build()); - try (FileOutputStream fileOutputStream = new FileOutputStream(tmpFile)) { - fileOutputStream.write(annotateResponse.getDocument()); - } - RedactionLogEntry resizedEntry = redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(expandedEntityKeyword)).findFirst().get(); - assertTrue(resizedEntry.getChanges().get(resizedEntry.getChanges().size() - 1).getType().equals(ChangeType.CHANGED)); - assertEquals(idToResize, resizedEntry.getId()); - assertEquals(1, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue1)).count()); - assertEquals(1, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue2) && !entry.lastChangeIsRemoved()).count()); - } - - - private static com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle toAnnotationRectangle(Rectangle2D rectangle2D, int pageNumber) { - - return new com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle((float) rectangle2D.getMaxX(), - (float) rectangle2D.getMaxY() - (float) rectangle2D.getHeight(), - (float) rectangle2D.getWidth(), - -(float) rectangle2D.getHeight(), - pageNumber); - } - - @Test public void testTableRedactionWithCvTableService() throws IOException { @@ -967,71 +875,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { @Test - public void testManualRedaction() throws IOException { - - System.out.println("testManualRedaction"); - long start = System.currentTimeMillis(); - String pdfFile = "files/Minimal Examples/Single Table.pdf"; - - ManualRedactions manualRedactions = new ManualRedactions(); - - String manualAddId = UUID.randomUUID().toString(); - - Comment comment = Comment.builder().date(OffsetDateTime.now()).user("TEST_USER").text("This is a comment test").build(); - manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf").fileId("fileId").status(AnnotationStatus.DECLINED).build())); - manualRedactions.setForceRedactions(Set.of(ManualForceRedaction.builder() - .annotationId("675eba69b0c2917de55462c817adaa05") - .fileId("fileId") - .legalBasis("Something") - .status(AnnotationStatus.APPROVED) - .build())); - - manualRedactions.getComments().put("e5be0f1d941bbb92a068e198648d06c4", List.of(comment)); - manualRedactions.getComments().put("0836727c3508a0b2ea271da69c04cc2f", List.of(comment)); - manualRedactions.getComments().put(manualAddId, List.of(comment)); - - ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry(); - manualRedactionEntry.setAnnotationId(manualAddId); - manualRedactionEntry.setFileId("fileId"); - manualRedactionEntry.setStatus(AnnotationStatus.REQUESTED); - manualRedactionEntry.setType("name"); - manualRedactionEntry.setValue("O'Loughlin C.K."); - manualRedactionEntry.setReason("Manual Redaction"); - manualRedactionEntry.setPositions(List.of(Rectangle.builder().topLeftX(375.61096f).topLeftY(241.282f).width(7.648041f).height(43.72262f).page(1).build(), - Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build())); - - AnalyzeRequest request = uploadFileToStorage(pdfFile); - request.setManualRedactions(manualRedactions); - analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); - AnalyzeResult result = analyzeService.analyze(request); - - manualRedactions.getEntriesToAdd().add(manualRedactionEntry); - manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf").fileId("fileId").status(AnnotationStatus.APPROVED).build())); - manualRedactions.setLegalBasisChanges((Set.of(ManualLegalBasisChange.builder() - .annotationId("675eba69b0c2917de55462c817adaa05") - .fileId("fileId") - .legalBasis("Manual Legal Basis Change") - .status(AnnotationStatus.APPROVED) - .build()))); - - analyzeService.reanalyze(request); - - var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); - - AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build()); - - try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) { - fileOutputStream.write(annotateResponse.getDocument()); - } - long end = System.currentTimeMillis(); - - System.out.println("duration: " + (end - start)); - System.out.println("numberOfPages: " + result.getNumberOfPages()); - } - - - @Test - public void phantomCellsDocumentTest() throws IOException { + public void phantomCellsDocumentTest() { AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/Phantom Cells.pdf"); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/annotate/AnnotationService.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/annotate/AnnotationService.java index 7f3d43d6..54dee8cc 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/annotate/AnnotationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/annotate/AnnotationService.java @@ -180,9 +180,6 @@ public class AnnotationService { private String createAnnotationContent(RedactionLogEntry redactionLogEntry) { - if (redactionLogEntry.isLocalManualRedaction()) { - return "\nManual Redaction\n\nIn Section : \"" + redactionLogEntry.getSection() + "\""; - } return redactionLogEntry.getType() + " \nRule " + redactionLogEntry.getMatchedRule() + " matched\n\n" + redactionLogEntry.getReason() + "\n\nLegal basis:" + redactionLogEntry.getLegalBasis() + "\n\nIn section: \"" + redactionLogEntry.getSection() + "\""; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/entity/RedactionEntityTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/entity/TextEntityTest.java similarity index 66% rename from redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/entity/RedactionEntityTest.java rename to redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/entity/TextEntityTest.java index d8999dd0..f594a1a3 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/entity/RedactionEntityTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/entity/TextEntityTest.java @@ -5,16 +5,16 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import org.junit.jupiter.api.Test; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; -public class RedactionEntityTest { +public class TextEntityTest { @Test public void testMatchedRule() { - RedactionEntity entity = RedactionEntity.initialEntityNode(new Boundary(1, 100), "PII", EntityType.ENTITY); + TextEntity entity = TextEntity.initialEntityNode(new TextRange(1, 100), "PII", EntityType.ENTITY); entity.skip("CBI.1.0", ""); entity.skip("CBI.2.0", ""); entity.skip("CBI.3.0", ""); @@ -25,24 +25,11 @@ public class RedactionEntityTest { } - @Test - public void testMatchedRuleWithManualRedaction() { - - RedactionEntity entity = RedactionEntity.initialEntityNode(new Boundary(1, 100), "PII", EntityType.ENTITY); - entity.skip("MAN.2.0", ""); - entity.skip("CBI.2.0", ""); - entity.skip("CBI.3.0", ""); - entity.skip("CBI.4.1", ""); - entity.skip("CBI.4.0", ""); - assertThat(entity.getMatchedRule().getRuleIdentifier().toString()).isEqualTo("MAN.2.0"); - assertThat(entity.getMatchedRuleUnit()).isEqualTo(2); - } - @Test public void testMatchedRuleWithNonsense() { - RedactionEntity entity = RedactionEntity.initialEntityNode(new Boundary(1, 100), "PII", EntityType.ENTITY); + TextEntity entity = TextEntity.initialEntityNode(new TextRange(1, 100), "PII", EntityType.ENTITY); assertThrows(IllegalArgumentException.class, () -> { entity.skip("", ""); }); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/BoundaryTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/BoundaryTest.java deleted file mode 100644 index d7918ca3..00000000 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/BoundaryTest.java +++ /dev/null @@ -1,85 +0,0 @@ -package com.iqser.red.service.redaction.v1.server.document.graph; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.Collections; -import java.util.List; - -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -class BoundaryTest { - - Boundary startBoundary; - - - @BeforeEach - void setUp() { - - startBoundary = new Boundary(10, 100); - } - - - @Test - void testContains() { - - assertTrue(startBoundary.contains(11)); - assertTrue(startBoundary.contains(50)); - assertFalse(startBoundary.contains(9)); - assertFalse(startBoundary.contains(100)); - assertFalse(startBoundary.contains(150)); - assertFalse(startBoundary.contains(-123)); - assertTrue(startBoundary.contains(new Boundary(11, 99))); - assertTrue(startBoundary.contains(new Boundary(10, 100))); - assertTrue(startBoundary.contains(new Boundary(11, 11))); - assertFalse(startBoundary.contains(9, 100)); - assertTrue(startBoundary.contains(100, 100)); - assertFalse(startBoundary.contains(100, 101)); - assertFalse(startBoundary.contains(150, 151)); - } - - - @Test - void testIntersects() { - - assertTrue(startBoundary.intersects(new Boundary(1, 11))); - assertTrue(startBoundary.intersects(new Boundary(11, 12))); - assertTrue(startBoundary.intersects(new Boundary(11, 100))); - assertFalse(startBoundary.intersects(new Boundary(100, 101))); - assertFalse(startBoundary.intersects(new Boundary(9, 10))); - assertFalse(startBoundary.intersects(new Boundary(0, 1))); - assertFalse(startBoundary.intersects(new Boundary(1000, 1001))); - assertTrue(startBoundary.intersects(new Boundary(99, 101))); - assertTrue(startBoundary.intersects(new Boundary(99, 101))); - assertTrue(startBoundary.intersects(new Boundary(9, 101))); - - } - - - @Test - void testSplit() { - - assertEquals(4, startBoundary.split(List.of(12, 40, 90)).size()); - assertEquals(List.of(new Boundary(10, 12), new Boundary(12, 40), new Boundary(40, 90), new Boundary(90, 100)), startBoundary.split(List.of(12, 40, 90))); - assertEquals(List.of(new Boundary(10, 40), new Boundary(40, 100)), startBoundary.split(List.of(40))); - assertEquals(1, startBoundary.split(Collections.emptyList()).size()); - assertEquals(1, startBoundary.split(List.of(startBoundary.start())).size()); - assertThrows(IndexOutOfBoundsException.class, () -> startBoundary.split(Collections.singletonList(0))); - assertThrows(IndexOutOfBoundsException.class, () -> startBoundary.split(Collections.singletonList(100))); - assertThrows(IndexOutOfBoundsException.class, () -> startBoundary.split(List.of(12, 40, 100))); - } - - - @Test - void testCompareTo() { - - Boundary beforeBoundary = new Boundary(1, 8); - Boundary afterBoundary = new Boundary(101, 102); - assertEquals(-1, beforeBoundary.compareTo(startBoundary)); - assertEquals(1, afterBoundary.compareTo(startBoundary)); - } - -} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentEntityInsertionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentEntityInsertionIntegrationTest.java index 96a70a1f..bc782b13 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentEntityInsertionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentEntityInsertionIntegrationTest.java @@ -17,7 +17,7 @@ import org.mockito.MockitoAnnotations; import org.springframework.beans.factory.annotation.Autowired; import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Headline; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.NodeType; @@ -67,22 +67,22 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra Document document = buildGraph("files/new/crafted document.pdf"); String type = "CBI_author"; - assertTrue(entityCreationService.byBoundary(new Boundary(0, 10), type, EntityType.ENTITY, document).isPresent()); - assertTrue(entityCreationService.byBoundary(new Boundary(0, 10), type, EntityType.ENTITY, document).isPresent()); + assertTrue(entityCreationService.byBoundary(new TextRange(0, 10), type, EntityType.ENTITY, document).isPresent()); + assertTrue(entityCreationService.byBoundary(new TextRange(0, 10), type, EntityType.ENTITY, document).isPresent()); assertEquals(1, document.getEntities().size()); - verify(kieSession, times(1)).insert(any(RedactionEntity.class)); + verify(kieSession, times(1)).insert(any(TextEntity.class)); } - private RedactionEntity createAndInsertEntity(Document document, String searchTerm) { + private TextEntity createAndInsertEntity(Document document, String searchTerm) { int start = document.getTextBlock().indexOf(searchTerm); assert start != -1; - Boundary boundary = new Boundary(start, start + searchTerm.length()); - RedactionEntity redactionEntity = RedactionEntity.initialEntityNode(boundary, "123", EntityType.ENTITY); - entityCreationService.addEntityToGraph(redactionEntity, document); - return redactionEntity; + TextRange textRange = new TextRange(start, start + searchTerm.length()); + TextEntity textEntity = TextEntity.initialEntityNode(textRange, "123", EntityType.ENTITY); + entityCreationService.addEntityToGraph(textEntity, document); + return textEntity; } @@ -91,18 +91,18 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra Document document = buildGraph("files/new/crafted document"); String searchTerm = "Clarissa"; - RedactionEntity redactionEntity = createAndInsertEntity(document, searchTerm); + TextEntity textEntity = createAndInsertEntity(document, searchTerm); - assertEquals("Expand to Hint ", redactionEntity.getTextBefore()); - assertEquals("’s Donut ←", redactionEntity.getTextAfter()); - assertEquals(searchTerm, redactionEntity.getValue()); + assertEquals("Expand to Hint ", textEntity.getTextBefore()); + assertEquals("’s Donut ←", textEntity.getTextAfter()); + assertEquals(searchTerm, textEntity.getValue()); assertEquals("Rule 5: Do not redact genitive CBI_authors (Entries based on Dict) ", - redactionEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText()); - assertEquals(3, redactionEntity.getIntersectingNodes().size()); - assertEquals(5, redactionEntity.getDeepestFullyContainingNode().getNumberOnPage()); - assertInstanceOf(Paragraph.class, redactionEntity.getDeepestFullyContainingNode()); + textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText()); + assertEquals(3, textEntity.getIntersectingNodes().size()); + assertEquals(5, textEntity.getDeepestFullyContainingNode().getNumberOnPage()); + assertInstanceOf(Paragraph.class, textEntity.getDeepestFullyContainingNode()); - assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity); + assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity); } @@ -111,17 +111,17 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra Document document = buildGraph("files/new/crafted document"); String searchTerm = "Rule 39:"; - RedactionEntity redactionEntity = createAndInsertEntity(document, searchTerm); + TextEntity textEntity = createAndInsertEntity(document, searchTerm); - assertEquals("", redactionEntity.getTextBefore()); - assertEquals(" Purity Hint", redactionEntity.getTextAfter()); - assertEquals(searchTerm, redactionEntity.getValue()); - assertEquals("Rule 39: Purity Hint ", redactionEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText()); - assertEquals(3, redactionEntity.getIntersectingNodes().size()); - assertEquals(6, redactionEntity.getDeepestFullyContainingNode().getNumberOnPage()); - assertInstanceOf(Headline.class, redactionEntity.getDeepestFullyContainingNode()); + assertEquals("", textEntity.getTextBefore()); + assertEquals(" Purity Hint", textEntity.getTextAfter()); + assertEquals(searchTerm, textEntity.getValue()); + assertEquals("Rule 39: Purity Hint ", textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText()); + assertEquals(3, textEntity.getIntersectingNodes().size()); + assertEquals(6, textEntity.getDeepestFullyContainingNode().getNumberOnPage()); + assertInstanceOf(Headline.class, textEntity.getDeepestFullyContainingNode()); - assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity); + assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity); } @@ -130,17 +130,17 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra Document document = buildGraph("files/new/crafted document"); String searchTerm = "1998"; - RedactionEntity redactionEntity = createAndInsertEntity(document, searchTerm); + TextEntity textEntity = createAndInsertEntity(document, searchTerm); - assertEquals("", redactionEntity.getTextBefore()); - assertEquals("", redactionEntity.getTextAfter()); - assertEquals(searchTerm, redactionEntity.getValue()); - assertEquals("Rule 6-11 (Authors Table) ", redactionEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText()); - assertEquals(5, redactionEntity.getIntersectingNodes().size()); - assertEquals(15, redactionEntity.getDeepestFullyContainingNode().getNumberOnPage()); - assertInstanceOf(TableCell.class, redactionEntity.getDeepestFullyContainingNode()); + assertEquals("", textEntity.getTextBefore()); + assertEquals("", textEntity.getTextAfter()); + assertEquals(searchTerm, textEntity.getValue()); + assertEquals("Rule 6-11 (Authors Table) ", textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText()); + assertEquals(5, textEntity.getIntersectingNodes().size()); + assertEquals(15, textEntity.getDeepestFullyContainingNode().getNumberOnPage()); + assertInstanceOf(TableCell.class, textEntity.getDeepestFullyContainingNode()); - assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity); + assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity); } @@ -212,19 +212,19 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra Document document = buildGraph("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06"); String searchTerm = "Cucurbit"; - RedactionEntity redactionEntity = createAndInsertEntity(document, searchTerm); + TextEntity textEntity = createAndInsertEntity(document, searchTerm); - assertEquals("except Cranberry; Vegetable, ", redactionEntity.getTextBefore()); - assertEquals(", Group 9;", redactionEntity.getTextAfter()); + assertEquals("except Cranberry; Vegetable, ", textEntity.getTextBefore()); + assertEquals(", Group 9;", textEntity.getTextAfter()); assertEquals("1.1.4 Evaluations carried out under other regulatory contexts ", - redactionEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText()); - assertEquals(searchTerm, redactionEntity.getValue()); - assertEquals(3, redactionEntity.getIntersectingNodes().size()); - assertEquals(5, redactionEntity.getDeepestFullyContainingNode().getNumberOnPage()); - assertTrue(redactionEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 10)); - assertInstanceOf(Paragraph.class, redactionEntity.getDeepestFullyContainingNode()); + textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText()); + assertEquals(searchTerm, textEntity.getValue()); + assertEquals(3, textEntity.getIntersectingNodes().size()); + assertEquals(5, textEntity.getDeepestFullyContainingNode().getNumberOnPage()); + assertTrue(textEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 10)); + assertInstanceOf(Paragraph.class, textEntity.getDeepestFullyContainingNode()); - assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity); + assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity); } @@ -238,21 +238,21 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra start = document.getTextBlock().indexOf(searchTerm, start + 1); assert start != -1; - Boundary boundary = new Boundary(start, start + searchTerm.length()); - RedactionEntity redactionEntity = RedactionEntity.initialEntityNode(boundary, "123", EntityType.ENTITY); - entityCreationService.addEntityToGraph(redactionEntity, document); + TextRange textRange = new TextRange(start, start + searchTerm.length()); + TextEntity textEntity = TextEntity.initialEntityNode(textRange, "123", EntityType.ENTITY); + entityCreationService.addEntityToGraph(textEntity, document); - assertEquals("2.6.1 Summary of ", redactionEntity.getTextBefore()); - assertEquals(" and excretion in", redactionEntity.getTextAfter()); + assertEquals("2.6.1 Summary of ", textEntity.getTextBefore()); + assertEquals(" and excretion in", textEntity.getTextAfter()); assertEquals("2.6.1 Summary of absorption, distribution, metabolism and excretion in mammals ", - redactionEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText()); - assertEquals(searchTerm, redactionEntity.getValue()); - assertEquals(3, redactionEntity.getIntersectingNodes().size()); - assertEquals(4, redactionEntity.getDeepestFullyContainingNode().getNumberOnPage()); - assertTrue(redactionEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 33)); - assertInstanceOf(Headline.class, redactionEntity.getDeepestFullyContainingNode()); + textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText()); + assertEquals(searchTerm, textEntity.getValue()); + assertEquals(3, textEntity.getIntersectingNodes().size()); + assertEquals(4, textEntity.getDeepestFullyContainingNode().getNumberOnPage()); + assertTrue(textEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 33)); + assertInstanceOf(Headline.class, textEntity.getDeepestFullyContainingNode()); - assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity); + assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity); } @@ -261,32 +261,32 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra Document document = buildGraph("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06"); String searchTerm = "N-deacetylation product"; - RedactionEntity redactionEntity = createAndInsertEntity(document, searchTerm); + TextEntity textEntity = createAndInsertEntity(document, searchTerm); - assertEquals("2-[(2-(1-hydroxy-ethyl)-6methyl-phenyl-amino]propan-1-ol (", redactionEntity.getTextBefore()); - assertEquals(" of metabolite of", redactionEntity.getTextAfter()); - assertEquals(searchTerm, redactionEntity.getValue()); - assertEquals(4, redactionEntity.getIntersectingNodes().size()); + assertEquals("2-[(2-(1-hydroxy-ethyl)-6methyl-phenyl-amino]propan-1-ol (", textEntity.getTextBefore()); + assertEquals(" of metabolite of", textEntity.getTextAfter()); + assertEquals(searchTerm, textEntity.getValue()); + assertEquals(4, textEntity.getIntersectingNodes().size()); assertEquals("Table 2.7-1: List of substances and metabolites and related structural formula ", - redactionEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText()); - assertTrue(redactionEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 54)); - assertEquals(26, redactionEntity.getDeepestFullyContainingNode().getNumberOnPage()); + textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText()); + assertTrue(textEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 54)); + assertEquals(26, textEntity.getDeepestFullyContainingNode().getNumberOnPage()); - assertInstanceOf(TableCell.class, redactionEntity.getDeepestFullyContainingNode()); + assertInstanceOf(TableCell.class, textEntity.getDeepestFullyContainingNode()); - assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity); + assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity); } // this might fail, if an entity with the same name exists twice in the deepest containing node - private static void assertSameOffsetInAllIntersectingNodes(String searchTerm, RedactionEntity redactionEntity) { + private static void assertSameOffsetInAllIntersectingNodes(String searchTerm, TextEntity textEntity) { - List paragraphStart = redactionEntity.getIntersectingNodes().stream()// + List paragraphStart = textEntity.getIntersectingNodes().stream()// .map(SemanticNode::getTextBlock)// - .map(textBlock -> textBlock.indexOf(searchTerm, redactionEntity.getDeepestFullyContainingNode().getBoundary().start()))// + .map(textBlock -> textBlock.indexOf(searchTerm, textEntity.getDeepestFullyContainingNode().getTextRange().start()))// .toList(); - paragraphStart.forEach(nodeStart -> assertEquals(redactionEntity.getBoundary().start(), nodeStart)); + paragraphStart.forEach(nodeStart -> assertEquals(textEntity.getTextRange().start(), nodeStart)); } @@ -296,17 +296,17 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra assert start != -1; - Boundary boundary = new Boundary(start, start + searchTerm.length()); - RedactionEntity redactionEntity = RedactionEntity.initialEntityNode(boundary, "123", EntityType.ENTITY); - entityCreationService.addEntityToGraph(redactionEntity, document); + TextRange textRange = new TextRange(start, start + searchTerm.length()); + TextEntity textEntity = TextEntity.initialEntityNode(textRange, "123", EntityType.ENTITY); + entityCreationService.addEntityToGraph(textEntity, document); Page pageNode = document.getPages().stream().filter(page -> page.getNumber() == pageNumber).findFirst().orElseThrow(); - assertEquals(redactionEntity.getValue(), searchTerm); - assertTrue(pageNode.getEntities().contains(redactionEntity)); - assertTrue(document.getPages().stream().filter(page -> page != pageNode).noneMatch(page -> page.getEntities().contains(redactionEntity))); - assertTrue(redactionEntity.getPages().contains(pageNode)); - assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity); - assertTrue(redactionEntity.getIntersectingNodes().stream().allMatch(node -> node.getEntities().contains(redactionEntity))); + assertEquals(textEntity.getValue(), searchTerm); + assertTrue(pageNode.getEntities().contains(textEntity)); + assertTrue(document.getPages().stream().filter(page -> page != pageNode).noneMatch(page -> page.getEntities().contains(textEntity))); + assertTrue(textEntity.getPages().contains(pageNode)); + assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity); + assertTrue(textEntity.getIntersectingNodes().stream().allMatch(node -> node.getEntities().contains(textEntity))); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentPerformanceIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentPerformanceIntegrationTest.java index b2a6fa01..7dcf7398 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentPerformanceIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentPerformanceIntegrationTest.java @@ -33,7 +33,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type; import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Section; @@ -136,7 +136,7 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration Dictionary dictionary = dictionaryService.getDeepCopyDictionary(TEST_DOSSIER_TEMPLATE_ID, TEST_DOSSIER_ID); long dictionarySearchStart = System.currentTimeMillis(); - List foundEntities = new LinkedList<>(); + List foundEntities = new LinkedList<>(); for (DictionaryModel model : dictionary.getDictionaryModels()) { findEntitiesWithSearchImplementation(document, model.getEntriesSearch(), EntityType.ENTITY, foundEntities, model.getType()); findEntitiesWithSearchImplementation(document, model.getFalsePositiveSearch(), EntityType.FALSE_POSITIVE, foundEntities, model.getType()); @@ -210,7 +210,7 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration float totalSearchTime = 0; float totalGraphTime = 0; float totalInsertTime = 0; - List foundEntities = new LinkedList<>(); + List foundEntities = new LinkedList<>(); for (int i = 0; i < numberOfRuns; i++) { foundEntities = new LinkedList<>(); @@ -256,9 +256,9 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration for (Page page : document.getPages()) { List entityPositionsOnPage = page.getEntities() .stream() - .filter(entityNode -> !entityNode.isRemoved()) - .filter(RedactionEntity::isApplied) - .flatMap(entityNode -> entityNode.getRedactionPositionsPerPage().stream()) + .filter(entityNode -> !entityNode.removed()) + .filter(TextEntity::applied) + .flatMap(entityNode -> entityNode.getPositionsOnPagePerPage().stream()) .filter(entityPosition -> entityPosition.getPage().equals(page)) .flatMap(entityPosition -> entityPosition.getRectanglePerLine().stream()) .toList(); @@ -270,9 +270,9 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration for (Page page : document.getPages()) { List entityPositionsOnPage = page.getEntities() .stream() - .filter(entityNode -> !entityNode.isRemoved()) - .filter(entityNode -> !entityNode.isApplied()) - .flatMap(entityNode -> entityNode.getRedactionPositionsPerPage().stream()) + .filter(entityNode -> !entityNode.removed()) + .filter(entityNode -> !entityNode.applied()) + .flatMap(entityNode -> entityNode.getPositionsOnPagePerPage().stream()) .filter(entityPosition -> entityPosition.getPage().equals(page)) .flatMap(entityPosition -> entityPosition.getRectanglePerLine().stream()) .toList(); @@ -289,14 +289,14 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration private void findEntitiesWithSearchImplementation(Document document, SearchImplementation searchImplementation, EntityType entityType, - List foundEntities, + List foundEntities, String type) { TextBlock textBlock = document.getTextBlock(); - searchImplementation.getBoundaries(textBlock, textBlock.getBoundary()) + searchImplementation.getBoundaries(textBlock, textBlock.getTextRange()) .stream() .filter(boundary -> boundaryIsSurroundedBySeparators(textBlock, boundary)) - .map(bounds -> RedactionEntity.initialEntityNode(bounds, type, entityType)) + .map(bounds -> TextEntity.initialEntityNode(bounds, type, entityType)) .forEach(foundEntities::add); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/MigrationPocTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/MigrationPocTest.java index 88826cfc..60c9d715 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/MigrationPocTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/MigrationPocTest.java @@ -5,6 +5,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.when; import java.io.IOException; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.stream.Stream; @@ -27,7 +28,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlo import com.iqser.red.service.redaction.v1.server.document.data.mapper.DocumentGraphMapper; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; import com.iqser.red.service.redaction.v1.server.redaction.adapter.CustomEntityCreationAdapter; -import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier; +import com.iqser.red.service.redaction.v1.server.redaction.model.ManualEntity; import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService; import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType; import com.knecon.fforesight.tenantcommons.TenantContext; @@ -88,9 +89,9 @@ public class MigrationPocTest extends BuildDocumentIntegrationTest { // IMPORTANT: always use the graph which is mapped from the DocumentData, since rounding errors occur during storage. Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(request.getDossierId(), request.getFileId())); - List notFoundManualRedactionEntries = redactionLogAdapter.toRedactionEntity(originalRedactionLog, document); + List notFoundManualRedactionEntries = redactionLogAdapter.toRedactionEntity(originalRedactionLog, document); - var migratedRedactionLogEntries = redactionLogCreatorService.createRedactionLog(document, TEST_DOSSIER_TEMPLATE_ID, notFoundManualRedactionEntries); + var migratedRedactionLogEntries = redactionLogCreatorService.createRedactionLog(document, TEST_DOSSIER_TEMPLATE_ID, notFoundManualRedactionEntries, Collections.emptyMap()); Map migratedIds = migratedRedactionLogEntries.stream().collect(toMap(RedactionLogEntry::getId, Functions.identity())); Map newIds = newRedactionLog.getRedactionLogEntry().stream().collect(toMap(RedactionLogEntry::getId, Functions.identity())); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/SearchImplementationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/SearchImplementationTest.java index 047037fc..1d9499d0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/SearchImplementationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/SearchImplementationTest.java @@ -8,7 +8,7 @@ import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService; import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService; @@ -27,7 +27,7 @@ public class SearchImplementationTest extends BuildDocumentIntegrationTest { SearchImplementation searchImplementation = new SearchImplementation(List.of("mydossierredaction"), true); EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService); - List entities = entityCreationService.bySearchImplementation(searchImplementation, "dossier_redaction", EntityType.ENTITY, document).toList(); + List entities = entityCreationService.bySearchImplementation(searchImplementation, "dossier_redaction", EntityType.ENTITY, document).toList(); assertEquals(2, entities.size()); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TextRangeTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TextRangeTest.java new file mode 100644 index 00000000..9c464d43 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TextRangeTest.java @@ -0,0 +1,85 @@ +package com.iqser.red.service.redaction.v1.server.document.graph; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Collections; +import java.util.List; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class TextRangeTest { + + TextRange startTextRange; + + + @BeforeEach + void setUp() { + + startTextRange = new TextRange(10, 100); + } + + + @Test + void testContains() { + + assertTrue(startTextRange.contains(11)); + assertTrue(startTextRange.contains(50)); + assertFalse(startTextRange.contains(9)); + assertFalse(startTextRange.contains(100)); + assertFalse(startTextRange.contains(150)); + assertFalse(startTextRange.contains(-123)); + assertTrue(startTextRange.contains(new TextRange(11, 99))); + assertTrue(startTextRange.contains(new TextRange(10, 100))); + assertTrue(startTextRange.contains(new TextRange(11, 11))); + assertFalse(startTextRange.contains(9, 100)); + assertTrue(startTextRange.contains(100, 100)); + assertFalse(startTextRange.contains(100, 101)); + assertFalse(startTextRange.contains(150, 151)); + } + + + @Test + void testIntersects() { + + assertTrue(startTextRange.intersects(new TextRange(1, 11))); + assertTrue(startTextRange.intersects(new TextRange(11, 12))); + assertTrue(startTextRange.intersects(new TextRange(11, 100))); + assertFalse(startTextRange.intersects(new TextRange(100, 101))); + assertFalse(startTextRange.intersects(new TextRange(9, 10))); + assertFalse(startTextRange.intersects(new TextRange(0, 1))); + assertFalse(startTextRange.intersects(new TextRange(1000, 1001))); + assertTrue(startTextRange.intersects(new TextRange(99, 101))); + assertTrue(startTextRange.intersects(new TextRange(99, 101))); + assertTrue(startTextRange.intersects(new TextRange(9, 101))); + + } + + + @Test + void testSplit() { + + assertEquals(4, startTextRange.split(List.of(12, 40, 90)).size()); + assertEquals(List.of(new TextRange(10, 12), new TextRange(12, 40), new TextRange(40, 90), new TextRange(90, 100)), startTextRange.split(List.of(12, 40, 90))); + assertEquals(List.of(new TextRange(10, 40), new TextRange(40, 100)), startTextRange.split(List.of(40))); + assertEquals(1, startTextRange.split(Collections.emptyList()).size()); + assertEquals(1, startTextRange.split(List.of(startTextRange.start())).size()); + assertThrows(IndexOutOfBoundsException.class, () -> startTextRange.split(Collections.singletonList(0))); + assertThrows(IndexOutOfBoundsException.class, () -> startTextRange.split(Collections.singletonList(100))); + assertThrows(IndexOutOfBoundsException.class, () -> startTextRange.split(List.of(12, 40, 100))); + } + + + @Test + void testCompareTo() { + + TextRange beforeTextRange = new TextRange(1, 8); + TextRange afterTextRange = new TextRange(101, 102); + assertEquals(-1, beforeTextRange.compareTo(startTextRange)); + assertEquals(1, afterTextRange.compareTo(startTextRange)); + } + +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/ManualRedactionEntryTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/CustomEntityCreationAdapterTest.java similarity index 81% rename from redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/ManualRedactionEntryTest.java rename to redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/CustomEntityCreationAdapterTest.java index 13f3674e..ce22183a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/ManualRedactionEntryTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/CustomEntityCreationAdapterTest.java @@ -1,4 +1,4 @@ -package com.iqser.red.service.redaction.v1.server.document.graph; +package com.iqser.red.service.redaction.v1.server.manualchanges; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.Mockito.when; @@ -6,6 +6,7 @@ import static org.wildfly.common.Assert.assertFalse; import static org.wildfly.common.Assert.assertTrue; import java.awt.geom.Rectangle2D; +import java.util.Collections; import java.util.List; import java.util.Set; @@ -19,19 +20,20 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; +import com.iqser.red.service.redaction.v1.server.document.graph.BuildDocumentIntegrationTest; import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService; import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService; import com.iqser.red.service.redaction.v1.server.redaction.adapter.CustomEntityCreationAdapter; -import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier; +import com.iqser.red.service.redaction.v1.server.redaction.model.ManualEntity; import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService; import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService; import lombok.SneakyThrows; -public class ManualRedactionEntryTest extends BuildDocumentIntegrationTest { +public class CustomEntityCreationAdapterTest extends BuildDocumentIntegrationTest { @Autowired private EntityEnrichmentService entityEnrichmentService; @@ -48,7 +50,7 @@ public class ManualRedactionEntryTest extends BuildDocumentIntegrationTest { @BeforeEach public void stubMethods() { - + MockitoAnnotations.openMocks(this); when(dictionaryService.getColor(DICTIONARY_AUTHOR, TEST_DOSSIER_TEMPLATE_ID)).thenReturn(new float[]{0f, 0f, 0f}); } @@ -61,10 +63,10 @@ public class ManualRedactionEntryTest extends BuildDocumentIntegrationTest { Document document = buildGraph("files/new/VV-919901.pdf"); EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService); - List tempEntities = entityCreationService.byString("To: Syngenta Ltd.", "temp", EntityType.ENTITY, document).toList(); + List tempEntities = entityCreationService.byString("To: Syngenta Ltd.", "temp", EntityType.ENTITY, document).toList(); assertFalse(tempEntities.isEmpty()); var tempEntity = tempEntities.get(0); - List positions = tempEntity.getRedactionPositionsPerPage() + List positions = tempEntity.getPositionsOnPagePerPage() .stream() .flatMap(redactionPosition -> redactionPosition.getRectanglePerLine() .stream() @@ -86,9 +88,8 @@ public class ManualRedactionEntryTest extends BuildDocumentIntegrationTest { tempEntity.removeFromGraph(); assertTrue(document.getEntities().isEmpty()); - List notFoundEntityIdentifiers = customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set.of(manualRedactionEntry), - document); - assertTrue(notFoundEntityIdentifiers.isEmpty()); + List notFoundManualEntities = customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set.of(manualRedactionEntry), document); + assertTrue(notFoundManualEntities.isEmpty()); assertEquals(1, document.getEntities().size()); } @@ -115,12 +116,14 @@ public class ManualRedactionEntryTest extends BuildDocumentIntegrationTest { assertTrue(document.getEntities().isEmpty()); - List notFoundEntityIdentifiers = customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set.of(manualRedactionEntry), - document); - assertEquals(1, notFoundEntityIdentifiers.size()); + List notFoundManualEntities = customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set.of(manualRedactionEntry), document); + assertEquals(1, notFoundManualEntities.size()); assertTrue(document.getEntities().isEmpty()); - List redactionLogEntries = redactionLogCreatorService.createRedactionLog(document, TEST_DOSSIER_TEMPLATE_ID, notFoundEntityIdentifiers); + List redactionLogEntries = redactionLogCreatorService.createRedactionLog(document, + TEST_DOSSIER_TEMPLATE_ID, + notFoundManualEntities, + Collections.emptyMap()); assertEquals(1, redactionLogEntries.size()); assertEquals(value, redactionLogEntries.get(0).getValue()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java new file mode 100644 index 00000000..4a5e073e --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java @@ -0,0 +1,323 @@ +package com.iqser.red.service.redaction.v1.server.manualchanges; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.mockito.Mockito.when; +import static org.wildfly.common.Assert.assertTrue; + +import java.awt.geom.Rectangle2D; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.file.Paths; +import java.time.OffsetDateTime; +import java.util.Comparator; +import java.util.List; +import java.util.Set; +import java.util.UUID; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.autoconfigure.EnableAutoConfiguration; +import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.ComponentScan; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.FilterType; +import org.springframework.context.annotation.Import; +import org.springframework.context.annotation.Primary; +import org.springframework.test.context.junit.jupiter.SpringExtension; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; +import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Comment; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; +import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ChangeType; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; +import com.iqser.red.service.redaction.v1.server.AbstractRedactionIntegrationTest; +import com.iqser.red.service.redaction.v1.server.Application; +import com.iqser.red.service.redaction.v1.server.FileSystemBackedStorageService; +import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest; +import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse; +import com.iqser.red.service.redaction.v1.server.document.data.mapper.DocumentGraphMapper; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; +import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService; +import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService; +import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils; +import com.iqser.red.storage.commons.StorageAutoConfiguration; +import com.iqser.red.storage.commons.service.StorageService; +import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType; +import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration; +import com.knecon.fforesight.tenantcommons.TenantContext; + +import lombok.SneakyThrows; + +@ExtendWith(SpringExtension.class) +@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT) +@Import(ManualChangesEnd2EndTest.TestConfiguration.class) +public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { + + private static final String RULES = loadFromClassPath("drools/rules.drl"); + @Autowired + private EntityEnrichmentService entityEnrichmentService; + + private EntityCreationService entityCreationService; + + @Configuration + @EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class}) + @Import(LayoutParsingServiceProcessorConfiguration.class) + @ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)}) + public static class TestConfiguration { + + @Bean + @Primary + public StorageService inmemoryStorage() { + + return new FileSystemBackedStorageService(); + } + + } + + + @BeforeEach + public void createServices() { + + entityCreationService = new EntityCreationService(entityEnrichmentService); + } + + + @BeforeEach + public void stubClients() { + + TenantContext.setTenantId("redaction"); + + when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(System.currentTimeMillis()); + when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(RULES)); + + loadDictionaryForTest(); + loadTypeForTest(); + loadNerForTest(); + when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, false)).thenReturn(getTypeResponse()); + + when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder() + .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(DOSSIER_REDACTIONS_INDICATOR) + .dossierTemplateId(TEST_DOSSIER_ID) + .hexColor("#ffe187") + .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .build())); + + mockDictionaryCalls(null); + + when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors); + } + + + @Test + @SneakyThrows + public void testManualResizeRedactionRemovesContainedEntities() { + + String filePath = "files/new/crafted document.pdf"; + AnalyzeRequest request = uploadFileToStorage(filePath); + analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); + AnalyzeResult result = analyzeService.analyze(request); + + String testEntityValue1 = "Desiree"; + String testEntityValue2 = "Melanie"; + RedactionLog redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); + assertEquals(2, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue1)).count()); + assertEquals(2, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue2)).count()); + + Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(TEST_DOSSIER_ID, TEST_FILE_ID)); + String expandedEntityKeyword = "Lorem ipsum dolor sit amet, consectetur adipiscing elit Desiree et al sed do eiusmod tempor incididunt ut labore et dolore magna aliqua Melanie et al. Reference No 12345 Lorem ipsum."; + TextEntity expandedEntity = entityCreationService.byString(expandedEntityKeyword, "PII", EntityType.ENTITY, document).findFirst().get(); + + String idToResize = redactionLog.getRedactionLogEntry() + .stream() + .filter(entry -> entry.getValue().equals(testEntityValue1)) + .max(Comparator.comparingInt(RedactionLogEntry::getStartOffset)) + .get() + .getId(); + List resizedPositions = expandedEntity.getPositionsOnPagePerPage() + .get(0) + .getRectanglePerLine() + .stream() + .map(rectangle2D -> toAnnotationRectangle(rectangle2D, 3)) + .toList(); + ManualResizeRedaction manualResizeRedaction = ManualResizeRedaction.builder() + .annotationId(idToResize) + .value(expandedEntityKeyword) + .positions(resizedPositions) + .status(AnnotationStatus.APPROVED) + .build(); + + ManualRedactions manualRedactions = new ManualRedactions(); + manualRedactions.getResizeRedactions().add(manualResizeRedaction); + request.setManualRedactions(manualRedactions); + AnalyzeResult reanalyzeResult = analyzeService.reanalyze(request); + + redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); + String annotatedFileName = Paths.get(filePath).getFileName().toString().replace(".pdf", "_annotated2.pdf"); + File tmpFile = Paths.get(OsUtils.getTemporaryDirectory(), annotatedFileName).toFile(); + AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build()); + try (FileOutputStream fileOutputStream = new FileOutputStream(tmpFile)) { + fileOutputStream.write(annotateResponse.getDocument()); + } + RedactionLogEntry resizedEntry = redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(expandedEntityKeyword)).findFirst().get(); + assertTrue(resizedEntry.getChanges().get(resizedEntry.getChanges().size() - 1).getType().equals(ChangeType.CHANGED)); + assertEquals(idToResize, resizedEntry.getId()); + assertEquals(1, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue1)).count()); + assertEquals(1, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue2) && !entry.lastChangeIsRemoved()).count()); + } + + + private static com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle toAnnotationRectangle(Rectangle2D rectangle2D, int pageNumber) { + + return new com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle((float) rectangle2D.getMaxX(), + (float) rectangle2D.getMaxY() - (float) rectangle2D.getHeight(), + (float) rectangle2D.getWidth(), + -(float) rectangle2D.getHeight(), + pageNumber); + } + + + @Test + public void testManualRedaction() throws IOException { + + System.out.println("testManualRedaction"); + long start = System.currentTimeMillis(); + String pdfFile = "files/Minimal Examples/Single Table.pdf"; + + ManualRedactions manualRedactions = new ManualRedactions(); + + String manualAddId = UUID.randomUUID().toString(); + + Comment comment = Comment.builder().date(OffsetDateTime.now()).user("TEST_USER").text("This is a comment test").build(); + manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf").fileId("fileId").status(AnnotationStatus.DECLINED).build())); + manualRedactions.setForceRedactions(Set.of(ManualForceRedaction.builder() + .annotationId("675eba69b0c2917de55462c817adaa05") + .fileId("fileId") + .legalBasis("Something") + .status(AnnotationStatus.APPROVED) + .build())); + + manualRedactions.getComments().put("e5be0f1d941bbb92a068e198648d06c4", List.of(comment)); + manualRedactions.getComments().put("0836727c3508a0b2ea271da69c04cc2f", List.of(comment)); + manualRedactions.getComments().put(manualAddId, List.of(comment)); + + ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry(); + manualRedactionEntry.setAnnotationId(manualAddId); + manualRedactionEntry.setFileId("fileId"); + manualRedactionEntry.setStatus(AnnotationStatus.REQUESTED); + manualRedactionEntry.setType("name"); + manualRedactionEntry.setValue("O'Loughlin C.K."); + manualRedactionEntry.setReason("Manual Redaction"); + manualRedactionEntry.setPositions(List.of(Rectangle.builder().topLeftX(375.61096f).topLeftY(241.282f).width(7.648041f).height(43.72262f).page(1).build(), + Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build())); + + AnalyzeRequest request = uploadFileToStorage(pdfFile); + request.setManualRedactions(manualRedactions); + analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); + AnalyzeResult result = analyzeService.analyze(request); + + manualRedactions.getEntriesToAdd().add(manualRedactionEntry); + manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf").fileId("fileId").status(AnnotationStatus.APPROVED).build())); + manualRedactions.setLegalBasisChanges((Set.of(ManualLegalBasisChange.builder() + .annotationId("675eba69b0c2917de55462c817adaa05") + .fileId("fileId") + .legalBasis("Manual Legal Basis Change") + .status(AnnotationStatus.APPROVED) + .build()))); + + analyzeService.reanalyze(request); + + var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); + + AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build()); + + try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) { + fileOutputStream.write(annotateResponse.getDocument()); + } + long end = System.currentTimeMillis(); + + System.out.println("duration: " + (end - start)); + System.out.println("numberOfPages: " + result.getNumberOfPages()); + } + + + @Test + public void testReCategorizeToVertebrateChangesCbiAuthor() { + + String filePath = "files/new/crafted document.pdf"; + AnalyzeRequest request = uploadFileToStorage(filePath); + analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); + AnalyzeResult result = analyzeService.analyze(request); + RedactionLog redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); + + var oxfordUniversityPress = redactionLog.getRedactionLogEntry() + .stream() + .filter(entry -> entry.getType().equals("published_information")) + .filter(entry -> entry.getValue().equals("Oxford University Press")) + .findFirst() + .get(); + + var asyaLyon = redactionLog.getRedactionLogEntry() + .stream() + .filter(entry -> entry.getType().equals("CBI_author")) + .filter(entry -> entry.getValue().equals("Asya Lyon")) + .findFirst() + .get(); + + assertEquals("CBI.3.2", asyaLyon.getMatchedRule()); + assertEquals("No vertebrate found", asyaLyon.getReason()); + + ManualImageRecategorization recategorization = ManualImageRecategorization.builder() + .requestDate(OffsetDateTime.now()) + .status(AnnotationStatus.APPROVED) + .type("vertebrate") + .annotationId(oxfordUniversityPress.getId()) + .fileId(TEST_FILE_ID) + .build(); + + request.setManualRedactions(new ManualRedactions()); + request.getManualRedactions().setImageRecategorization(Set.of(recategorization)); + + analyzeService.reanalyze(request); + RedactionLog redactionLog2 = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); + assertFalse(redactionLog2.getRedactionLogEntry() + .stream() + .filter(entry -> entry.getType().equals("published_information")) + .anyMatch(entry -> entry.getValue().equals("Oxford University Press"))); + + var oxfordUniversityPressRecategorized = redactionLog2.getRedactionLogEntry() + .stream() + .filter(entry -> entry.getType().equals("vertebrate")) + .filter(entry -> entry.getValue().equals("Oxford University Press")) + .findFirst() + .get(); + + assertEquals(1, oxfordUniversityPressRecategorized.getManualChanges().size()); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/ManualResizeRedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesIntegrationTest.java similarity index 66% rename from redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/ManualResizeRedactionIntegrationTest.java rename to redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesIntegrationTest.java index 8ec79895..07732060 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/ManualResizeRedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesIntegrationTest.java @@ -1,4 +1,4 @@ -package com.iqser.red.service.redaction.v1.server.document.graph; +package com.iqser.red.service.redaction.v1.server.manualchanges; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -31,24 +31,25 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; +import com.iqser.red.service.redaction.v1.server.document.graph.BuildDocumentIntegrationTest; import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.PositionOnPage; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Paragraph; import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService; import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService; -import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService; +import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService; -@Import(ManualResizeRedactionIntegrationTest.TestConfiguration.class) -public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrationTest { +@Import(ManualChangesIntegrationTest.TestConfiguration.class) +public class ManualChangesIntegrationTest extends BuildDocumentIntegrationTest { private static final String RULES = "drools/manual_redaction_rules.drl"; @Autowired private EntityEnrichmentService entityEnrichmentService; private EntityCreationService entityCreationService; - private ManualRedactionApplicationService manualRedactionApplicationService; + private ManualChangesApplicationService manualChangesApplicationService; @Qualifier("kieContainer") @Autowired @@ -79,7 +80,7 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati public void createServices() { entityCreationService = new EntityCreationService(entityEnrichmentService); - manualRedactionApplicationService = new ManualRedactionApplicationService(entityCreationService); + manualChangesApplicationService = new ManualChangesApplicationService(entityCreationService); } @@ -87,23 +88,23 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati public void manualResizeRedactionTest() { Document document = buildGraph("files/new/crafted document"); - Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet()); - Set biggerEntities = entityCreationService.byString("David Ksenia Max Mustermann", "CBI_author", EntityType.ENTITY, document) + Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet()); + Set biggerEntities = entityCreationService.byString("David Ksenia Max Mustermann", "CBI_author", EntityType.ENTITY, document) .collect(Collectors.toUnmodifiableSet()); - RedactionEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); - RedactionEntity biggerEntity = biggerEntities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); + TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); + TextEntity biggerEntity = biggerEntities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); - String initialId = entity.getRedactionPositionsPerPage().get(0).getId(); + String initialId = entity.getPositionsOnPagePerPage().get(0).getId(); ManualResizeRedaction manualResizeRedaction = ManualResizeRedaction.builder() .annotationId(initialId) .value(biggerEntity.getValue()) - .positions(toAnnotationRectangles(biggerEntity.getRedactionPositionsPerPage().get(0))) + .positions(toAnnotationRectangles(biggerEntity.getPositionsOnPagePerPage().get(0))) .status(AnnotationStatus.APPROVED) .build(); KieSession kieSession = kieContainer.newKieSession(); - kieSession.setGlobal("manualRedactionApplicationService", manualRedactionApplicationService); + kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService); kieSession.insert(document); document.streamAllSubNodes().forEach(kieSession::insert); kieSession.insert(entity); @@ -111,14 +112,14 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati kieSession.fireAllRules(); kieSession.dispose(); - assertEquals(biggerEntity.getBoundary(), entity.getBoundary()); + assertEquals(biggerEntity.getTextRange(), entity.getTextRange()); assertEquals(biggerEntity.getDeepestFullyContainingNode(), entity.getDeepestFullyContainingNode()); assertEquals(biggerEntity.getIntersectingNodes(), entity.getIntersectingNodes()); assertEquals(biggerEntity.getPages(), entity.getPages()); assertEquals(biggerEntity.getValue(), entity.getValue()); - assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId()); - assertRectanglesAlmostEqual(biggerEntity.getRedactionPositionsPerPage().get(0).getRectanglePerLine(), entity.getRedactionPositionsPerPage().get(0).getRectanglePerLine()); - assertTrue(entity.isResized()); + assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId()); + assertRectanglesAlmostEqual(biggerEntity.getPositionsOnPagePerPage().get(0).getRectanglePerLine(), entity.getPositionsOnPagePerPage().get(0).getRectanglePerLine()); + assertTrue(entity.resized()); } @@ -126,11 +127,11 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati public void manualForceRedactionTest() { Document document = buildGraph("files/new/crafted document"); - Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet()); + Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet()); - RedactionEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); + TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); - String initialId = entity.getRedactionPositionsPerPage().get(0).getId(); + String initialId = entity.getPositionsOnPagePerPage().get(0).getId(); ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder() .annotationId(initialId) .status(AnnotationStatus.APPROVED) @@ -139,7 +140,7 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati .build(); KieSession kieSession = kieContainer.newKieSession(); - kieSession.setGlobal("manualRedactionApplicationService", manualRedactionApplicationService); + kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService); kieSession.insert(entity); kieSession.insert(manualForceRedaction); kieSession.insert(document); @@ -151,11 +152,11 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati assertFalse(entity.getIntersectingNodes().isEmpty()); assertEquals(1, entity.getPages().size()); assertEquals("David Ksenia", entity.getValue()); - assertEquals("Something", entity.getMatchedRule().getLegalBasis()); - assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId()); - assertFalse(entity.isRemoved()); - assertTrue(entity.isSkipRemoveEntitiesContainedInLarger()); - assertTrue(entity.isApplied()); + assertEquals("Something", entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis())); + assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId()); + assertFalse(entity.removed()); + assertTrue(entity.hasManualChanges()); + assertTrue(entity.applied()); } @@ -163,15 +164,15 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati public void manualIDRemovalTest() { Document document = buildGraph("files/new/crafted document"); - Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet()); + Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet()); - RedactionEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); + TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); - String initialId = entity.getRedactionPositionsPerPage().get(0).getId(); + String initialId = entity.getPositionsOnPagePerPage().get(0).getId(); IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).status(AnnotationStatus.APPROVED).requestDate(OffsetDateTime.now()).build(); KieSession kieSession = kieContainer.newKieSession(); - kieSession.setGlobal("manualRedactionApplicationService", manualRedactionApplicationService); + kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService); kieSession.insert(document); document.streamAllSubNodes().forEach(kieSession::insert); kieSession.insert(entity); @@ -180,8 +181,8 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati kieSession.dispose(); assertEquals("David Ksenia", entity.getValue()); - assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId()); - assertTrue(entity.isIgnored()); + assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId()); + assertTrue(entity.ignored()); } @@ -189,11 +190,11 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati public void manualIDRemovalButAlsoForceRedactionTest() { Document document = buildGraph("files/new/crafted document"); - Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet()); + Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet()); - RedactionEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); + TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); - String initialId = entity.getRedactionPositionsPerPage().get(0).getId(); + String initialId = entity.getPositionsOnPagePerPage().get(0).getId(); IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).status(AnnotationStatus.APPROVED).requestDate(OffsetDateTime.now()).build(); ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder() .annotationId(initialId) @@ -203,7 +204,7 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati .build(); KieSession kieSession = kieContainer.newKieSession(); - kieSession.setGlobal("manualRedactionApplicationService", manualRedactionApplicationService); + kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService); kieSession.insert(document); document.streamAllSubNodes().forEach(kieSession::insert); kieSession.insert(entity); @@ -216,9 +217,9 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati assertFalse(entity.getIntersectingNodes().isEmpty()); assertEquals(1, entity.getPages().size()); assertEquals("David Ksenia", entity.getValue()); - assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId()); - assertFalse(entity.isRemoved()); - assertFalse(entity.isIgnored()); + assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId()); + assertFalse(entity.removed()); + assertFalse(entity.ignored()); } @@ -226,15 +227,15 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati public void manualIDRemovalNotApprovedTest() { Document document = buildGraph("files/new/crafted document"); - Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet()); + Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet()); - RedactionEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); + TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); - String initialId = entity.getRedactionPositionsPerPage().get(0).getId(); + String initialId = entity.getPositionsOnPagePerPage().get(0).getId(); IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).status(AnnotationStatus.REQUESTED).build(); KieSession kieSession = kieContainer.newKieSession(); - kieSession.setGlobal("manualRedactionApplicationService", manualRedactionApplicationService); + kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService); kieSession.insert(entity); kieSession.insert(idRemoval); kieSession.insert(document); @@ -246,8 +247,9 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati assertFalse(entity.getIntersectingNodes().isEmpty()); assertEquals(1, entity.getPages().size()); assertEquals("David Ksenia", entity.getValue()); - assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId()); - assertFalse(entity.isRemoved()); + assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId()); + assertFalse(entity.ignored()); + assertFalse(entity.removed()); } @@ -271,9 +273,9 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati } - private static List toAnnotationRectangles(RedactionPosition redactionPositions) { + private static List toAnnotationRectangles(PositionOnPage positionsOnPage) { - return redactionPositions.getRectanglePerLine().stream().map(rectangle2D -> toAnnotationRectangle(rectangle2D, redactionPositions.getPage().getNumber())).toList(); + return positionsOnPage.getRectanglePerLine().stream().map(rectangle2D -> toAnnotationRectangle(rectangle2D, positionsOnPage.getPage().getNumber())).toList(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesUnitTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesUnitTest.java new file mode 100644 index 00000000..00b71cee --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesUnitTest.java @@ -0,0 +1,134 @@ +package com.iqser.red.service.redaction.v1.server.manualchanges; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.time.OffsetDateTime; +import java.util.List; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange; +import com.iqser.red.service.redaction.v1.server.document.graph.BuildDocumentIntegrationTest; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; +import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService; +import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService; + +public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { + + @Autowired + private EntityEnrichmentService entityEnrichmentService; + + private EntityCreationService entityCreationService; + + + @BeforeEach + public void createServices() { + + entityCreationService = new EntityCreationService(entityEnrichmentService); + } + + + @Test + public void testBasicOverrides() { + + OffsetDateTime start = OffsetDateTime.now(); + String reason = "whatever"; + Document document = buildGraphNoImages("files/new/crafted document.pdf"); + List entities = entityCreationService.byString("David Ksenia", "test", EntityType.ENTITY, document).peek(e -> e.apply("T.0.0", reason)).toList(); + assertFalse(entities.isEmpty()); + TextEntity entity = entities.get(0); + assertTrue(entity.active()); + assertTrue(entity.applied()); + assertFalse(entity.removed()); + assertFalse(entity.resized()); + assertFalse(entity.ignored()); + assertEquals("n-a", entity.getMatchedRule().getLegalBasis()); + String annotationId = entity.getPositionsOnPagePerPage().get(0).getId(); + + // remove first + IdRemoval removal = IdRemoval.builder().requestDate(start).fileId(TEST_FILE_ID).annotationId(annotationId).status(AnnotationStatus.APPROVED).build(); + entity.getManualOverwrite().addChange(removal); + assertTrue(entity.ignored()); + assertFalse(entity.applied()); + assertEquals(reason + ", removed by manual override", entity.buildReasonWithManualChangeDescriptions()); + + // force again + ManualForceRedaction forceRedaction = ManualForceRedaction.builder() + .requestDate(start.plusSeconds(1)) + .fileId(TEST_FILE_ID) + .annotationId(annotationId) + .legalBasis("coolio") + .status(AnnotationStatus.APPROVED) + .build(); + entity.getManualOverwrite().addChange(forceRedaction); + assertTrue(entity.applied()); + assertFalse(entity.ignored()); + assertFalse(entity.removed()); + assertEquals(reason + ", removed by manual override, forced by manual override", entity.buildReasonWithManualChangeDescriptions()); + assertEquals("coolio", entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis())); + + // remove again + IdRemoval removal2 = IdRemoval.builder().requestDate(start.plusSeconds(3)).fileId(TEST_FILE_ID).annotationId(annotationId).status(AnnotationStatus.APPROVED).build(); + entity.getManualOverwrite().addChange(removal2); + assertTrue(entity.ignored()); + assertFalse(entity.applied()); + assertEquals(reason + ", removed by manual override, forced by manual override, removed by manual override", entity.buildReasonWithManualChangeDescriptions()); + + // force again, with requestDate before removal2, but after force1 + ManualForceRedaction forceRedaction2 = ManualForceRedaction.builder() + .requestDate(start.plusSeconds(2)) + .fileId(TEST_FILE_ID) + .annotationId(annotationId) + .legalBasis("coolio") + .status(AnnotationStatus.APPROVED) + .build(); + entity.getManualOverwrite().addChange(forceRedaction2); + assertTrue(entity.ignored()); + assertFalse(entity.applied()); + assertEquals(reason + ", removed by manual override, forced by manual override, forced by manual override, removed by manual override", + entity.buildReasonWithManualChangeDescriptions()); + + String legalBasis = "Yeah"; + String section = "Some random section!"; + String value = "Some random value!"; + ManualLegalBasisChange legalBasisChange = ManualLegalBasisChange.builder() + .legalBasis(legalBasis) + .annotationId(annotationId) + .requestDate(start.plusSeconds(4)) + .section(section) + .status(AnnotationStatus.APPROVED) + .user("peter") + .value(value) + .build(); + entity.getManualOverwrite().addChange(legalBasisChange); + assertTrue(entity.ignored()); + assertFalse(entity.applied()); + assertEquals(reason + ", removed by manual override, forced by manual override, forced by manual override, removed by manual override, legal basis was manually changed", + entity.buildReasonWithManualChangeDescriptions()); + assertEquals(value, entity.getManualOverwrite().getValue().orElse(entity.getValue())); + assertEquals(legalBasis, entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis())); + assertEquals(section, entity.getManualOverwrite().getSection().orElse(entity.getDeepestFullyContainingNode().toString())); + + ManualImageRecategorization imageRecategorizationRequest = ManualImageRecategorization.builder() + .type("type") + .requestDate(start.plusSeconds(5)) + .annotationId(annotationId) + .status(AnnotationStatus.APPROVED) + .build(); + entity.getManualOverwrite().addChange(imageRecategorizationRequest); + assertTrue(entity.getManualOverwrite().getRecategorized().isPresent()); + assertTrue(entity.getManualOverwrite().getRecategorized().get()); + assertEquals("type", entity.getManualOverwrite().getType().orElse(entity.getType())); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/realdata/AnalyseFileRealDataIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/realdata/AnalyseFileRealDataIntegrationTest.java index d4a96779..a3c539be 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/realdata/AnalyseFileRealDataIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/realdata/AnalyseFileRealDataIntegrationTest.java @@ -5,9 +5,6 @@ import java.util.List; import java.util.Set; import org.junit.jupiter.api.Test; -import org.springframework.amqp.rabbit.core.RabbitTemplate; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.test.mock.mockito.MockBean; import org.springframework.core.io.ClassPathResource; import com.fasterxml.jackson.databind.ObjectMapper; @@ -15,7 +12,6 @@ import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; import com.iqser.red.service.persistence.service.v1.api.shared.model.MessageType; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; -import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; @@ -25,9 +21,6 @@ public class AnalyseFileRealDataIntegrationTest extends LiveDataIntegrationTest public static final String FILE_NAME = "test-file"; - @Autowired - private AnnotationService annotationService; - @Test @SneakyThrows @@ -57,26 +50,15 @@ public class AnalyseFileRealDataIntegrationTest extends LiveDataIntegrationTest .fileAttributes(List.of()) .build(); - try { - var text = new ClassPathResource(BASE_DIR + "data/" + FILE_NAME + ".text.json").getInputStream(); - var sectionText = new ClassPathResource(BASE_DIR + "data/" + FILE_NAME + ".section-grid.json").getInputStream(); - redactionStorageService.storeObject("dossierId", "fileId", FileType.TEXT, text); - redactionStorageService.storeObject("dossierId", "fileId", FileType.SECTION_GRID, sectionText); - } catch (Exception e) { - log.info("No text file provided, Performing Structure analysis"); - ar.setMessageType(MessageType.STRUCTURE_ANALYSE); - redactionMessageReceiver.receiveAnalyzeRequest(ar, false); - } - - try { - var redactionLog = new ClassPathResource(BASE_DIR + "data/" + FILE_NAME + ".redaction-log.json").getInputStream(); - } catch (Exception e) { - log.info("No redaction log provided, Performing full analysis"); - - ar.setMessageType(MessageType.ANALYSE); - redactionMessageReceiver.receiveAnalyzeRequest(ar, false); - } +// try { +// var redactionLog = new ClassPathResource(BASE_DIR + "data/" + FILE_NAME + ".redaction-log.json").getInputStream(); +// } catch (Exception e) { +// log.info("No redaction log provided, Performing full analysis"); +// +// ar.setMessageType(MessageType.ANALYSE); +// redactionMessageReceiver.receiveAnalyzeRequest(ar, false); +// } simulateIncrement(List.of("Desiree"), "PII", 3L); ar.setMessageType(MessageType.REANALYSE); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/NerEntitiesAdapterTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/NerEntitiesAdapterTest.java index 12f2d2c2..2fb4cd37 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/NerEntitiesAdapterTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/NerEntitiesAdapterTest.java @@ -22,10 +22,10 @@ import org.springframework.core.io.ClassPathResource; import com.iqser.red.commons.jackson.ObjectMapperFactory; import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel; import com.iqser.red.service.redaction.v1.server.document.graph.BuildDocumentIntegrationTest; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.PositionOnPage; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService; import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService; @@ -66,7 +66,7 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest { Document document = buildGraphNoImages(filePath); List entityRecognitionEntities = validateAndCombine(parseNerEntities(nerEntitiesFilePath), document); assertFalse(entityRecognitionEntities.isEmpty()); - assertTrue(entityRecognitionEntities.stream().allMatch(entity -> entity.boundary().start() < entity.boundary().end())); + assertTrue(entityRecognitionEntities.stream().allMatch(entity -> entity.textRange().start() < entity.textRange().end())); ClassPathResource resource = new ClassPathResource(filePath); try (PDDocument pdDocument = Loader.loadPDF(resource.getInputStream())) { @@ -75,8 +75,8 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest { .getNerEntityList() .stream() .filter(e -> !e.type().equals("CBI_author")); - List redactionEntities = Stream.concat(entityRecognitionEntities.stream(), unchangedAddressParts) - .map(e -> entityCreationService.byBoundary(e.boundary(), e.type(), EntityType.ENTITY, document)) + List redactionEntities = Stream.concat(entityRecognitionEntities.stream(), unchangedAddressParts) + .map(e -> entityCreationService.byBoundary(e.textRange(), e.type(), EntityType.ENTITY, document)) .filter(Optional::isPresent) .map(Optional::get) .toList(); @@ -107,23 +107,23 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest { log.info("Parsed NerEntitiesModel"); NerEntities nerEntities = NerEntitiesAdapter.toNerEntities(nerEntitiesModel, document); log.info("Validated and mapped"); - List nerEntityBoundaries = NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities).toList(); + List nerEntityBoundaries = NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities).toList(); log.info("Combined to CBI_address"); - List cbiAddressEntities = nerEntityBoundaries.stream() + List cbiAddressEntities = nerEntityBoundaries.stream() .map(b -> entityCreationService.byBoundary(b, "CBI_address", EntityType.RECOMMENDATION, document)) .filter(Optional::isPresent) .map(Optional::get) .toList(); assertFalse(cbiAddressEntities.isEmpty()); - assertTrue(cbiAddressEntities.stream().allMatch(entity -> entity.getBoundary().start() < entity.getBoundary().end())); + assertTrue(cbiAddressEntities.stream().allMatch(entity -> entity.getTextRange().start() < entity.getTextRange().end())); ClassPathResource resource = new ClassPathResource(filePath); try (PDDocument pdDocument = Loader.loadPDF(resource.getInputStream())) { - List validatedEntities = NerEntitiesAdapter.toNerEntities(parseNerEntities(nerEntitiesFilePath), document) + List validatedEntities = NerEntitiesAdapter.toNerEntities(parseNerEntities(nerEntitiesFilePath), document) .getNerEntityList() .stream() - .map(e -> entityCreationService.byBoundary(e.boundary(), e.type(), EntityType.ENTITY, document)) + .map(e -> entityCreationService.byBoundary(e.textRange(), e.type(), EntityType.ENTITY, document)) .filter(Optional::isPresent) .map(Optional::get) .toList(); @@ -153,24 +153,24 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest { } - private List getPositionsFromEntities(Stream entities) { + private List getPositionsFromEntities(Stream entities) { - return entities.map(RedactionEntity::getRedactionPositionsPerPage) + return entities.map(TextEntity::getPositionsOnPagePerPage) .flatMap(Collection::stream) - .map(RedactionPosition::getRectanglePerLine) + .map(PositionOnPage::getRectanglePerLine) .flatMap(Collection::stream) .toList(); } - private List getPositionsFromEntityOfType(String type, List entities) { + private List getPositionsFromEntityOfType(String type, List entities) { return getPositionsFromEntities(entities.stream().filter(e -> e.getType().equals(type))); } - private List getPositionsFromEntityNotOfType(List types, List entities) { + private List getPositionsFromEntityNotOfType(List types, List entities) { return getPositionsFromEntities(entities.stream().filter(e -> types.stream().noneMatch(type -> e.getType().equals(type)))); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionServiceTest.java index d6fa2bd6..12f27f52 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionServiceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionServiceTest.java @@ -43,6 +43,19 @@ class DroolsExecutionServiceTest { assertTrue(droolsSyntaxValidation.isCompiled()); } + @Test + @SneakyThrows + void testAllRules() { + + DroolsExecutionService droolsExecutionService = new DroolsExecutionService(rulesClient, entityEnrichmentService, new DroolsSyntaxValidationFactory()); + var rulesFile = new ClassPathResource("drools/all_rules.drl"); + + String rulesString = new String(rulesFile.getInputStream().readAllBytes()); + + DroolsSyntaxValidation droolsSyntaxValidation = droolsExecutionService.testRules(rulesString); + assertTrue(droolsSyntaxValidation.isCompiled()); + } + @Test @SneakyThrows diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl index 5fecf596..39348dfd 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl @@ -33,23 +33,24 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; -import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService; +import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService; import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; global Document document global EntityCreationService entityCreationService -global ManualRedactionApplicationService manualRedactionApplicationService +global ManualChangesApplicationService manualChangesApplicationService global Dictionary dictionary //------------------------------------ queries ------------------------------------ @@ -77,7 +78,7 @@ rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL" rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: RedactionEntity(type == "CBI_author", dictionaryEntry) + $entity: TextEntity(type == "CBI_author", dictionaryEntry) then $entity.apply("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end @@ -85,7 +86,7 @@ rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)" rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: RedactionEntity(type == "CBI_author", dictionaryEntry) + $entity: TextEntity(type == "CBI_author", dictionaryEntry) then $entity.apply("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -95,7 +96,7 @@ rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)" rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: RedactionEntity(type == "CBI_address", dictionaryEntry) + $entity: TextEntity(type == "CBI_address", dictionaryEntry) then $entity.skip("CBI.1.0", "Address found for Non Vertebrate Study"); end @@ -103,7 +104,7 @@ rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)" rule "CBI.1.1: Redact CBI Address (Vertebrate Study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: RedactionEntity(type == "CBI_address", dictionaryEntry) + $entity: TextEntity(type == "CBI_address", dictionaryEntry) then $entity.apply("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -112,9 +113,9 @@ rule "CBI.1.1: Redact CBI Address (Vertebrate Study)" // Rule unit: CBI.2 rule "CBI.2.0: Don't redact genitive CBI_author" when - $entity: RedactionEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), isApplied()) + $entity: TextEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), applied()) then - entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document) + entityCreationService.byBoundary($entity.getTextRange(), "CBI_author", EntityType.FALSE_POSITIVE, document) .ifPresent(falsePositive -> falsePositive.skip("CBI.2.0", "Genitive Author found")); end @@ -299,7 +300,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC rule "PII.0.0: Redact all PII (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $pii: RedactionEntity(type == "PII", dictionaryEntry) + $pii: TextEntity(type == "PII", dictionaryEntry) then $pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end @@ -307,7 +308,7 @@ rule "PII.0.0: Redact all PII (non vertebrate study)" rule "PII.0.1: Redact all PII (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $pii: RedactionEntity(type == "PII", dictionaryEntry) + $pii: TextEntity(type == "PII", dictionaryEntry) then $pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -453,9 +454,10 @@ rule "ETC.3.1: Redact logos (non vertebrate study)" rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'" when not FileAttribute(label == "Confidentiality", value == "confidential") - $dossierRedaction: RedactionEntity(type == "dossier_redaction") + $dossierRedaction: TextEntity(type == "dossier_redaction") then $dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential"); + update($dossierRedaction); $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); end @@ -489,10 +491,10 @@ rule "AI.1.0: combine and add NER Entities as CBI_address" rule "MAN.0.0: Apply manual resize redaction" salience 128 when - $resizeRedaction: ManualResizeRedaction($id: annotationId) - $entityToBeResized: RedactionEntity(matchesAnnotationId($id)) + $resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeResized: TextEntity(matchesAnnotationId($id)) then - manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction); + manualChangesApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction); retract($resizeRedaction); update($entityToBeResized); $entityToBeResized.getIntersectingNodes().forEach(node -> update(node)); @@ -503,11 +505,10 @@ rule "MAN.0.0: Apply manual resize redaction" rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity" salience 128 when - $idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) - not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) - $entityToBeRemoved: RedactionEntity(matchesAnnotationId($id)) + $idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeRemoved: TextEntity(matchesAnnotationId($id)) then - $entityToBeRemoved.ignore("MAN.1.0", "Removed by ManualRedaction"); + $entityToBeRemoved.getManualOverwrite().addChange($idRemoval); update($entityToBeRemoved); retract($idRemoval); $entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node)); @@ -516,11 +517,10 @@ rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image" salience 128 when - $idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) - not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) + $idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED) $imageEntityToBeRemoved: Image($id == id) then - $imageEntityToBeRemoved.ignore("MAN.1.1", "Removed by ManualRedaction"); + $imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval); update($imageEntityToBeRemoved); retract($idRemoval); update($imageEntityToBeRemoved.getParent()); @@ -532,29 +532,27 @@ rule "MAN.2.0: Apply force redaction" no-loop true salience 128 when - $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) - $entityToForce: RedactionEntity(matchesAnnotationId($id)) + $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToForce: TextEntity(matchesAnnotationId($id)) then - $entityToForce.force("MAN.2.0", "Forced redaction", $legalBasis); - $entityToForce.setRemoved(false); - $entityToForce.setIgnored(false); - $entityToForce.setSkipRemoveEntitiesContainedInLarger(true); + $entityToForce.getManualOverwrite().addChange($force); update($entityToForce); $entityToForce.getIntersectingNodes().forEach(node -> update(node)); end // Rule unit: MAN.3 -rule "MAN.3.0: Apply image recategorization" +rule "MAN.3.0: Apply entity recategorization" salience 128 when - $recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type) - $imageToBeRecategorized: Image($id == id) + $recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeRecategorized: TextEntity(matchesAnnotationId($id)) then - $imageToBeRecategorized.setImageType(ImageType.fromString($imageType)); - update($imageToBeRecategorized); - update($imageToBeRecategorized.getParent()); + $entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node)); + manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization); retract($recategorization); + // Entity is copied and inserted, so the old entity needs to be retracted to avoid duplication. + retract($entityToBeRecategorized); end @@ -564,8 +562,8 @@ rule "MAN.3.0: Apply image recategorization" rule "X.0.0: remove Entity contained by Entity of same type" salience 65 when - $larger: RedactionEntity($type: type, $entityType: entityType, isActive()) - $contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $larger: TextEntity($type: type, $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized(), active()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -576,10 +574,10 @@ rule "X.0.0: remove Entity contained by Entity of same type" rule "X.1.0: merge intersecting Entities of same type" salience 64 when - $first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) - $second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $first: TextEntity($type: type, $entityType: entityType, !resized(), active()) + $second: TextEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized(), active()) then - RedactionEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document); + TextEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document); $first.remove("X.1.0", "merge intersecting Entities of same type"); $second.remove("X.1.0", "merge intersecting Entities of same type"); retract($first); @@ -592,8 +590,8 @@ rule "X.1.0: merge intersecting Entities of same type" rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when - $falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE, isActive()) - $entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized(), active()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -605,8 +603,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" salience 64 when - $falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, isActive()) - $recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -617,8 +615,8 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" salience 256 when - $entity: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive()) - $recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $entity: TextEntity($type: type, entityType == EntityType.ENTITY, active()) + $recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"); @@ -630,8 +628,8 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" salience 256 when - $entity: RedactionEntity(entityType == EntityType.ENTITY, isActive()) - $recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $entity: TextEntity(entityType == EntityType.ENTITY, active()) + $recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized(), active()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY"); retract($recommendation); @@ -642,8 +640,8 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY" salience 32 when - $higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive()) - $lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $higherRank: TextEntity($type: type, entityType == EntityType.ENTITY, active()) + $lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized(), active()) then $lowerRank.getIntersectingNodes().forEach(node -> update(node)); $lowerRank.remove("X.6.0", "remove Entity of lower rank, when intersected by entity of type ENTITY"); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl index 74b1c7f3..0108d8d3 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl @@ -33,23 +33,24 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; -import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService; +import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService; import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; global Document document global EntityCreationService entityCreationService -global ManualRedactionApplicationService manualRedactionApplicationService +global ManualChangesApplicationService manualChangesApplicationService global Dictionary dictionary //------------------------------------ queries ------------------------------------ @@ -90,7 +91,7 @@ rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL" rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: RedactionEntity(type == "CBI_author", dictionaryEntry) + $entity: TextEntity(type == "CBI_author", dictionaryEntry) then $entity.apply("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end @@ -98,7 +99,7 @@ rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)" rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: RedactionEntity(type == "CBI_author", dictionaryEntry) + $entity: TextEntity(type == "CBI_author", dictionaryEntry) then $entity.apply("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -108,7 +109,7 @@ rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)" rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: RedactionEntity(type == "CBI_address", dictionaryEntry) + $entity: TextEntity(type == "CBI_address", dictionaryEntry) then $entity.skip("CBI.1.0", "Address found for Non Vertebrate Study"); end @@ -116,7 +117,7 @@ rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)" rule "CBI.1.1: Redact CBI Address (Vertebrate Study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: RedactionEntity(type == "CBI_address", dictionaryEntry) + $entity: TextEntity(type == "CBI_address", dictionaryEntry) then $entity.apply("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -125,9 +126,9 @@ rule "CBI.1.1: Redact CBI Address (Vertebrate Study)" // Rule unit: CBI.2 rule "CBI.2.0: Don't redact genitive CBI_author" when - $entity: RedactionEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), isApplied()) + $entity: TextEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), applied()) then - entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document) + entityCreationService.byBoundary($entity.getTextRange(), "CBI_author", EntityType.FALSE_POSITIVE, document) .ifPresent(falsePositive -> falsePositive.skip("CBI.2.0", "Genitive Author found")); end @@ -474,7 +475,7 @@ rule "CBI.12.2: Redact CBI_author, if its row contains a cell with header \"Vert rule "CBI.13.0: Ignore CBI Address Recommendations" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: RedactionEntity(type == "CBI_address", entityType == EntityType.RECOMMENDATION) + $entity: TextEntity(type == "CBI_address", entityType == EntityType.RECOMMENDATION) then $entity.ignore("CBI.13.0", "Ignore CBI Address Recommendations"); retract($entity) @@ -484,7 +485,7 @@ rule "CBI.13.0: Ignore CBI Address Recommendations" // Rule unit: CBI.14 rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at\"" when - $sponsorEntity: RedactionEntity(type == "CBI_sponsor", textBefore.contains("batches produced at")) + $sponsorEntity: TextEntity(type == "CBI_sponsor", textBefore.contains("batches produced at")) then $sponsorEntity.apply("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)"); end @@ -587,7 +588,7 @@ rule "CBI.17.1: Add recommendation for Addresses in Test Organism sections, with rule "CBI.18.0: Expand CBI_author entities with firstname initials" no-loop true when - $entityToExpand: RedactionEntity(type == "CBI_author", + $entityToExpand: TextEntity(type == "CBI_author", value.matches("[^\\s]+"), textAfter.startsWith(" "), anyMatch(textAfter, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)") @@ -595,7 +596,7 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials" then entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)") .ifPresent(expandedEntity -> { - expandedEntity.setMatchedRuleList($entityToExpand.getMatchedRuleList()); + expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); $entityToExpand.remove("CBI.18.0", "Expand CBI_author entities with firstname initials"); retract($entityToExpand); }); @@ -605,11 +606,11 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials" // Rule unit: CBI.19 rule "CBI.19.0: Expand CBI_author entities with salutation prefix" when - $entityToExpand: RedactionEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) + $entityToExpand: TextEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) then entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*") .ifPresent(expandedEntity -> { - expandedEntity.setMatchedRuleList($entityToExpand.getMatchedRuleList()); + expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); $entityToExpand.remove("CBI.19.0", "Expand CBI_author entities with salutation prefix"); retract($entityToExpand); }); @@ -650,7 +651,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC rule "PII.0.0: Redact all PII (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $pii: RedactionEntity(type == "PII", dictionaryEntry) + $pii: TextEntity(type == "PII", dictionaryEntry) then $pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end @@ -658,7 +659,7 @@ rule "PII.0.0: Redact all PII (non vertebrate study)" rule "PII.0.1: Redact all PII (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $pii: RedactionEntity(type == "PII", dictionaryEntry) + $pii: TextEntity(type == "PII", dictionaryEntry) then $pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -991,10 +992,10 @@ rule "PII.11.0: Redact On behalf of Sequani Ltd.:" // Rule unit: PII.12 rule "PII.12.0: Expand PII entities with salutation prefix" when - $entityToExpand: RedactionEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) + $entityToExpand: TextEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) then entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*") - .ifPresent(expandedEntity -> expandedEntity.setMatchedRuleList($entityToExpand.getMatchedRuleList())); + .ifPresent(expandedEntity -> expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList())); end @@ -1059,7 +1060,7 @@ rule "ETC.3.1: Redact logos (non vertebrate study)" // Rule unit: ETC.4 rule "ETC.4.0: Redact dossier dictionary entries" when - $dossierRedaction: RedactionEntity(type == "dossier_redaction") + $dossierRedaction: TextEntity(type == "dossier_redaction") then $dossierRedaction.apply("ETC.4.0", "Specification of impurity found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end @@ -1069,7 +1070,7 @@ rule "ETC.4.0: Redact dossier dictionary entries" rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'" when not FileAttribute(label == "Confidentiality", value == "confidential") - $dossierRedaction: RedactionEntity(type == "dossier_redaction") + $dossierRedaction: TextEntity(type == "dossier_redaction") then $dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential"); update($dossierRedaction); @@ -1161,10 +1162,10 @@ rule "AI.2.0: add all NER Entities of any type except CBI_author" rule "MAN.0.0: Apply manual resize redaction" salience 128 when - $resizeRedaction: ManualResizeRedaction($id: annotationId) - $entityToBeResized: RedactionEntity(matchesAnnotationId($id)) + $resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeResized: TextEntity(matchesAnnotationId($id)) then - manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction); + manualChangesApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction); retract($resizeRedaction); update($entityToBeResized); $entityToBeResized.getIntersectingNodes().forEach(node -> update(node)); @@ -1173,10 +1174,10 @@ rule "MAN.0.0: Apply manual resize redaction" rule "MAN.0.1: Apply manual resize redaction" salience 128 when - $resizeRedaction: ManualResizeRedaction($id: annotationId) + $resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED) $imageToBeResized: Image(id == $id) then - manualRedactionApplicationService.resizeImage($imageToBeResized, $resizeRedaction); + manualChangesApplicationService.resizeImage($imageToBeResized, $resizeRedaction); retract($resizeRedaction); update($imageToBeResized); update($imageToBeResized.getParent()); @@ -1187,11 +1188,10 @@ rule "MAN.0.1: Apply manual resize redaction" rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity" salience 128 when - $idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) - not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) - $entityToBeRemoved: RedactionEntity(matchesAnnotationId($id)) + $idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeRemoved: TextEntity(matchesAnnotationId($id)) then - $entityToBeRemoved.ignore("MAN.1.0", "Removed by ManualRedaction"); + $entityToBeRemoved.getManualOverwrite().addChange($idRemoval); update($entityToBeRemoved); retract($idRemoval); $entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node)); @@ -1200,11 +1200,10 @@ rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image" salience 128 when - $idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) - not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) + $idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED) $imageEntityToBeRemoved: Image($id == id) then - $imageEntityToBeRemoved.ignore("MAN.1.1", "Removed by ManualRedaction"); + $imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval); update($imageEntityToBeRemoved); retract($idRemoval); update($imageEntityToBeRemoved.getParent()); @@ -1216,13 +1215,10 @@ rule "MAN.2.0: Apply force redaction" no-loop true salience 128 when - $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) - $entityToForce: RedactionEntity(matchesAnnotationId($id)) + $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToForce: TextEntity(matchesAnnotationId($id)) then - $entityToForce.force("MAN.2.0", "Forced redaction", $legalBasis); - $entityToForce.setRemoved(false); - $entityToForce.setIgnored(false); - $entityToForce.setSkipRemoveEntitiesContainedInLarger(true); + $entityToForce.getManualOverwrite().addChange($force); update($entityToForce); $entityToForce.getIntersectingNodes().forEach(node -> update(node)); end @@ -1231,29 +1227,60 @@ rule "MAN.2.1: Apply force redaction to images" no-loop true salience 128 when - $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) + $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) $imageToForce: Image(id == $id) then - $imageToForce.force("MAN.2.0", "Forced redaction", $legalBasis); - $imageToForce.setRemoved(false); - $imageToForce.setIgnored(false); + $imageToForce.getManualOverwrite().addChange($force); update($imageToForce); update($imageToForce.getParent()); end + // Rule unit: MAN.3 -rule "MAN.3.0: Apply image recategorization" +rule "MAN.3.0: Apply entity recategorization" salience 128 when - $recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type) + $recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeRecategorized: TextEntity(matchesAnnotationId($id)) + then + $entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node)); + manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization); + retract($recategorization); + // Entity is copied and inserted, so the old entity needs to be retracted to avoid duplication. + retract($entityToBeRecategorized); + end + +rule "MAN.3.1: Apply image recategorization" + salience 128 + when + $recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED) $imageToBeRecategorized: Image($id == id) then - $imageToBeRecategorized.setImageType(ImageType.fromString($imageType)); + manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization); update($imageToBeRecategorized); update($imageToBeRecategorized.getParent()); retract($recategorization); end +// Rule unit: MAN.4 +rule "MAN.4.0: Apply legal basis change" + salience 128 + when + $legalbasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED) + $imageToBeRecategorized: Image($id == id) + then + $imageToBeRecategorized.getManualOverwrite().addChange($legalbasisChange); + end + +rule "MAN.4.1: Apply legal basis change" + salience 128 + when + $legalBasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeChanged: TextEntity(matchesAnnotationId($id)) + then + $entityToBeChanged.getManualOverwrite().addChange($legalBasisChange); + end + //------------------------------------ Entity merging rules ------------------------------------ @@ -1261,8 +1288,8 @@ rule "MAN.3.0: Apply image recategorization" rule "X.0.0: remove Entity contained by Entity of same type" salience 65 when - $larger: RedactionEntity($type: type, $entityType: entityType, isActive()) - $contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $larger: TextEntity($type: type, $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized(), active()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -1273,10 +1300,10 @@ rule "X.0.0: remove Entity contained by Entity of same type" rule "X.1.0: merge intersecting Entities of same type" salience 64 when - $first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) - $second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $first: TextEntity($type: type, $entityType: entityType, !resized(), active()) + $second: TextEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized(), active()) then - RedactionEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document); + TextEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document); $first.remove("X.1.0", "merge intersecting Entities of same type"); $second.remove("X.1.0", "merge intersecting Entities of same type"); retract($first); @@ -1289,8 +1316,8 @@ rule "X.1.0: merge intersecting Entities of same type" rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when - $falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE, isActive()) - $entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized(), active()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -1302,8 +1329,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" salience 64 when - $falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, isActive()) - $recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -1314,8 +1341,8 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" salience 256 when - $entity: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive()) - $recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $entity: TextEntity($type: type, entityType == EntityType.ENTITY, active()) + $recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"); @@ -1327,8 +1354,8 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" salience 256 when - $entity: RedactionEntity(entityType == EntityType.ENTITY, isActive()) - $recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $entity: TextEntity(entityType == EntityType.ENTITY, active()) + $recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized(), active()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY"); retract($recommendation); @@ -1339,8 +1366,8 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY" salience 32 when - $higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive()) - $lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $higherRank: TextEntity($type: type, entityType == EntityType.ENTITY, active()) + $lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized(), active()) then $lowerRank.getIntersectingNodes().forEach(node -> update(node)); $lowerRank.remove("X.6.0", "remove Entity of lower rank, when intersected by entity of type ENTITY"); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index 8ddf0a48..bdd925ef 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -34,22 +34,22 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; -import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService; +import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService; import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; global Document document global EntityCreationService entityCreationService -global ManualRedactionApplicationService manualRedactionApplicationService +global ManualChangesApplicationService manualRedactionApplicationService global Dictionary dictionary //------------------------------------ queries ------------------------------------ @@ -397,7 +397,7 @@ rule "DOC.8.1: Performing Laboratory (Name)" $section: Section(containsString("PERFORMING LABORATORY:")) then nerEntities.streamEntitiesOfType("COUNTRY") - .filter(nerEntity -> $section.getBoundary().contains(nerEntity.boundary())) + .filter(nerEntity -> $section.getTextRange().contains(nerEntity.textRange())) .map(nerEntity -> entityCreationService.byNerEntity(nerEntity, "laboratory_country", EntityType.ENTITY, $section)) .forEach(entity -> { entity.apply("DOC.8.2", "Performing Laboratory found", "n-a"); @@ -1221,7 +1221,7 @@ rule "DOC.44.0: Results (Main Study)" FileAttribute(label == "OECD Number", value == "429") $section: Section( getHeadline().containsString("Results") - && getHeadline().getBoundary().length() < 20 + && getHeadline().getGetTextRange().length() < 20 && !(getHeadline().containsString("Appendix") || getHeadline().containsString("Table")) ) then @@ -1262,7 +1262,7 @@ rule "MAN.0.0: Apply manual resize redaction" salience 128 when $resizeRedaction: ManualResizeRedaction($id: annotationId) - $entityToBeResized: RedactionEntity(matchesAnnotationId($id)) + $entityToBeResized: TextEntity(matchesAnnotationId($id)) then manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction); retract($resizeRedaction); @@ -1276,7 +1276,7 @@ rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to when IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) - $entityToBeRemoved: RedactionEntity(matchesAnnotationId($id)) + $entityToBeRemoved: TextEntity(matchesAnnotationId($id)) then $entityToBeRemoved.removeFromGraph(); retract($entityToBeRemoved); @@ -1298,7 +1298,7 @@ rule "MAN.2.0: Apply force redaction" salience 128 when $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) - $entityToForce: RedactionEntity(matchesAnnotationId($id)) + $entityToForce: TextEntity(matchesAnnotationId($id)) then $entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis); $entityToForce.setRemoved(false); @@ -1328,8 +1328,8 @@ rule "MAN.3.0: Apply image recategorization" rule "X.0.0: remove Entity contained by Entity of same type" salience 65 when - $larger: RedactionEntity($type: type, $entityType: entityType, isActive()) - $contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $larger: TextEntity($type: type, $entityType: entityType, getActive()) + $contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, getActive()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl index 51b5b098..c748f8ae 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl @@ -33,23 +33,24 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; -import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService; +import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService; import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; global Document document global EntityCreationService entityCreationService -global ManualRedactionApplicationService manualRedactionApplicationService +global ManualChangesApplicationService manualChangesApplicationService global Dictionary dictionary //------------------------------------ queries ------------------------------------ @@ -58,14 +59,16 @@ query "getFileAttributes" $fileAttribute: FileAttribute() end +//------------------------------------ Manual redaction rules ------------------------------------ + // Rule unit: MAN.0 rule "MAN.0.0: Apply manual resize redaction" salience 128 when - $resizeRedaction: ManualResizeRedaction($id: annotationId) - $entityToBeResized: RedactionEntity(matchesAnnotationId($id)) + $resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeResized: TextEntity(matchesAnnotationId($id)) then - manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction); + manualChangesApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction); retract($resizeRedaction); update($entityToBeResized); $entityToBeResized.getIntersectingNodes().forEach(node -> update(node)); @@ -74,10 +77,10 @@ rule "MAN.0.0: Apply manual resize redaction" rule "MAN.0.1: Apply manual resize redaction" salience 128 when - $resizeRedaction: ManualResizeRedaction($id: annotationId) + $resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED) $imageToBeResized: Image(id == $id) then - manualRedactionApplicationService.resizeImage($imageToBeResized, $resizeRedaction); + manualChangesApplicationService.resizeImage($imageToBeResized, $resizeRedaction); retract($resizeRedaction); update($imageToBeResized); update($imageToBeResized.getParent()); @@ -88,11 +91,10 @@ rule "MAN.0.1: Apply manual resize redaction" rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity" salience 128 when - $idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) - not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) - $entityToBeRemoved: RedactionEntity(matchesAnnotationId($id)) + $idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeRemoved: TextEntity(matchesAnnotationId($id)) then - $entityToBeRemoved.ignore("MAN.1.0", "Removed by ManualRedaction"); + $entityToBeRemoved.getManualOverwrite().addChange($idRemoval); update($entityToBeRemoved); retract($idRemoval); $entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node)); @@ -101,11 +103,10 @@ rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image" salience 128 when - $idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) - not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) + $idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED) $imageEntityToBeRemoved: Image($id == id) then - $imageEntityToBeRemoved.ignore("MAN.1.1", "Removed by ManualRedaction"); + $imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval); update($imageEntityToBeRemoved); retract($idRemoval); update($imageEntityToBeRemoved.getParent()); @@ -117,13 +118,10 @@ rule "MAN.2.0: Apply force redaction" no-loop true salience 128 when - $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) - $entityToForce: RedactionEntity(matchesAnnotationId($id)) + $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToForce: TextEntity(matchesAnnotationId($id)) then - $entityToForce.force("MAN.2.0", "Forced redaction", $legalBasis); - $entityToForce.setRemoved(false); - $entityToForce.setIgnored(false); - $entityToForce.setSkipRemoveEntitiesContainedInLarger(true); + $entityToForce.getManualOverwrite().addChange($force); update($entityToForce); $entityToForce.getIntersectingNodes().forEach(node -> update(node)); end @@ -132,29 +130,61 @@ rule "MAN.2.1: Apply force redaction to images" no-loop true salience 128 when - $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) + $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) $imageToForce: Image(id == $id) then - $imageToForce.force("MAN.2.0", "Forced redaction", $legalBasis); - $imageToForce.setRemoved(false); - $imageToForce.setIgnored(false); + $imageToForce.getManualOverwrite().addChange($force); update($imageToForce); update($imageToForce.getParent()); end + // Rule unit: MAN.3 -rule "MAN.3.0: Apply image recategorization" +rule "MAN.3.0: Apply entity recategorization" salience 128 when - $recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type) + $recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeRecategorized: TextEntity(matchesAnnotationId($id)) + then + manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization); + $entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node)); + retract($recategorization); + // Entity is copied and inserted, so the old entity needs to be retracted to avoid duplication. + retract($entityToBeRecategorized); + end + +rule "MAN.3.1: Apply image recategorization" + salience 128 + when + $recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED) $imageToBeRecategorized: Image($id == id) then - $imageToBeRecategorized.setImageType(ImageType.fromString($imageType)); + manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization); update($imageToBeRecategorized); update($imageToBeRecategorized.getParent()); retract($recategorization); end +// Rule unit: MAN.4 +rule "MAN.4.0: Apply legal basis change" + salience 128 + when + $legalbasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED) + $imageToBeRecategorized: Image($id == id) + then + $imageToBeRecategorized.getManualOverwrite().addChange($legalbasisChange); + end + +rule "MAN.4.1: Apply legal basis change" + salience 128 + when + $legalBasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeChanged: TextEntity(matchesAnnotationId($id)) + then + $entityToBeChanged.getManualOverwrite().addChange($legalBasisChange); + end + + //------------------------------------ Local dictionary search rules ------------------------------------ // Rule unit: LDS.0 diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index 04c90499..7174041b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -33,23 +33,24 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; -import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService; +import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService; import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; global Document document global EntityCreationService entityCreationService -global ManualRedactionApplicationService manualRedactionApplicationService +global ManualChangesApplicationService manualChangesApplicationService global Dictionary dictionary //------------------------------------ queries ------------------------------------ @@ -315,7 +316,7 @@ rule "CBI.12.2: Redact CBI_author, if its row contains a cell with header \"Vert // Rule unit: CBI.14 rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at\"" when - $sponsorEntity: RedactionEntity(type == "CBI_sponsor", textBefore.contains("batches produced at")) + $sponsorEntity: TextEntity(type == "CBI_sponsor", textBefore.contains("batches produced at")) then $sponsorEntity.apply("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)"); end @@ -418,7 +419,7 @@ rule "CBI.17.1: Add recommendation for Addresses in Test Organism sections, with rule "CBI.18.0: Expand CBI_author entities with firstname initials" no-loop true when - $entityToExpand: RedactionEntity(type == "CBI_author", + $entityToExpand: TextEntity(type == "CBI_author", value.matches("[^\\s]+"), textAfter.startsWith(" "), anyMatch(textAfter, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)") @@ -426,7 +427,7 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials" then entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)") .ifPresent(expandedEntity -> { - expandedEntity.setMatchedRuleList($entityToExpand.getMatchedRuleList()); + expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); $entityToExpand.remove("CBI.18.0", "Expand CBI_author entities with firstname initials"); retract($entityToExpand); }); @@ -436,11 +437,11 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials" // Rule unit: CBI.19 rule "CBI.19.0: Expand CBI_author entities with salutation prefix" when - $entityToExpand: RedactionEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) + $entityToExpand: TextEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) then entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*") .ifPresent(expandedEntity -> { - expandedEntity.setMatchedRuleList($entityToExpand.getMatchedRuleList()); + expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); $entityToExpand.remove("CBI.19.0", "Expand CBI_author entities with salutation prefix"); retract($entityToExpand); }); @@ -481,7 +482,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC rule "PII.0.0: Redact all PII (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $pii: RedactionEntity(type == "PII", dictionaryEntry) + $pii: TextEntity(type == "PII", dictionaryEntry) then $pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end @@ -489,7 +490,7 @@ rule "PII.0.0: Redact all PII (non vertebrate study)" rule "PII.0.1: Redact all PII (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $pii: RedactionEntity(type == "PII", dictionaryEntry) + $pii: TextEntity(type == "PII", dictionaryEntry) then $pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -726,10 +727,10 @@ rule "PII.11.0: Redact On behalf of Sequani Ltd.:" // Rule unit: PII.12 rule "PII.12.0: Expand PII entities with salutation prefix" when - $entityToExpand: RedactionEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) + $entityToExpand: TextEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) then entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*") - .ifPresent(expandedEntity -> expandedEntity.setMatchedRuleList($entityToExpand.getMatchedRuleList())); + .ifPresent(expandedEntity -> expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList())); end @@ -784,7 +785,7 @@ rule "ETC.3.1: Redact logos (non vertebrate study)" // Rule unit: ETC.4 rule "ETC.4.0: Redact dossier dictionary entries" when - $dossierRedaction: RedactionEntity(type == "dossier_redaction") + $dossierRedaction: TextEntity(type == "dossier_redaction") then $dossierRedaction.apply("ETC.4.0", "Specification of impurity found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end @@ -794,7 +795,7 @@ rule "ETC.4.0: Redact dossier dictionary entries" rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'" when not FileAttribute(label == "Confidentiality", value == "confidential") - $dossierRedaction: RedactionEntity(type == "dossier_redaction") + $dossierRedaction: TextEntity(type == "dossier_redaction") then $dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential"); update($dossierRedaction); @@ -874,10 +875,10 @@ rule "AI.1.0: combine and add NER Entities as CBI_address" rule "MAN.0.0: Apply manual resize redaction" salience 128 when - $resizeRedaction: ManualResizeRedaction($id: annotationId) - $entityToBeResized: RedactionEntity(matchesAnnotationId($id)) + $resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeResized: TextEntity(matchesAnnotationId($id)) then - manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction); + manualChangesApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction); retract($resizeRedaction); update($entityToBeResized); $entityToBeResized.getIntersectingNodes().forEach(node -> update(node)); @@ -886,10 +887,10 @@ rule "MAN.0.0: Apply manual resize redaction" rule "MAN.0.1: Apply manual resize redaction" salience 128 when - $resizeRedaction: ManualResizeRedaction($id: annotationId) + $resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED) $imageToBeResized: Image(id == $id) then - manualRedactionApplicationService.resizeImage($imageToBeResized, $resizeRedaction); + manualChangesApplicationService.resizeImage($imageToBeResized, $resizeRedaction); retract($resizeRedaction); update($imageToBeResized); update($imageToBeResized.getParent()); @@ -900,11 +901,10 @@ rule "MAN.0.1: Apply manual resize redaction" rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity" salience 128 when - $idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) - not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) - $entityToBeRemoved: RedactionEntity(matchesAnnotationId($id)) + $idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeRemoved: TextEntity(matchesAnnotationId($id)) then - $entityToBeRemoved.ignore("MAN.1.0", "Removed by ManualRedaction"); + $entityToBeRemoved.getManualOverwrite().addChange($idRemoval); update($entityToBeRemoved); retract($idRemoval); $entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node)); @@ -913,11 +913,10 @@ rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image" salience 128 when - $idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) - not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) + $idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED) $imageEntityToBeRemoved: Image($id == id) then - $imageEntityToBeRemoved.ignore("MAN.1.1", "Removed by ManualRedaction"); + $imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval); update($imageEntityToBeRemoved); retract($idRemoval); update($imageEntityToBeRemoved.getParent()); @@ -929,13 +928,10 @@ rule "MAN.2.0: Apply force redaction" no-loop true salience 128 when - $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) - $entityToForce: RedactionEntity(matchesAnnotationId($id)) + $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToForce: TextEntity(matchesAnnotationId($id)) then - $entityToForce.force("MAN.2.0", "Forced redaction", $legalBasis); - $entityToForce.setRemoved(false); - $entityToForce.setIgnored(false); - $entityToForce.setSkipRemoveEntitiesContainedInLarger(true); + $entityToForce.getManualOverwrite().addChange($force); update($entityToForce); $entityToForce.getIntersectingNodes().forEach(node -> update(node)); end @@ -944,39 +940,70 @@ rule "MAN.2.1: Apply force redaction to images" no-loop true salience 128 when - $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) + $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) $imageToForce: Image(id == $id) then - $imageToForce.force("MAN.2.0", "Forced redaction", $legalBasis); - $imageToForce.setRemoved(false); - $imageToForce.setIgnored(false); + $imageToForce.getManualOverwrite().addChange($force); update($imageToForce); update($imageToForce.getParent()); end // Rule unit: MAN.3 -rule "MAN.3.0: Apply image recategorization" +rule "MAN.3.0: Apply entity recategorization" salience 128 when - $recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type) + $recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeRecategorized: TextEntity(matchesAnnotationId($id)) + then + $entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node)); + manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization); + retract($recategorization); + // Entity is copied and inserted, so the old entity needs to be retracted to avoid duplication. + retract($entityToBeRecategorized); + end + +rule "MAN.3.1: Apply image recategorization" + salience 128 + when + $recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED) $imageToBeRecategorized: Image($id == id) then - $imageToBeRecategorized.setImageType(ImageType.fromString($imageType)); + manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization); update($imageToBeRecategorized); update($imageToBeRecategorized.getParent()); retract($recategorization); end +// Rule unit: MAN.4 +rule "MAN.4.0: Apply legal basis change" + salience 128 + when + $legalbasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED) + $imageToBeRecategorized: Image($id == id) + then + $imageToBeRecategorized.getManualOverwrite().addChange($legalbasisChange); + end + +rule "MAN.4.1: Apply legal basis change" + salience 128 + when + $legalBasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeChanged: TextEntity(matchesAnnotationId($id)) + then + $entityToBeChanged.getManualOverwrite().addChange($legalBasisChange); + end + + //------------------------------------ Entity merging rules ------------------------------------ // Rule unit: X.0 rule "X.0.0: remove Entity contained by Entity of same type" salience 65 when - $larger: RedactionEntity($type: type, $entityType: entityType, isActive()) - $contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $larger: TextEntity($type: type, $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized(), active()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -987,10 +1014,10 @@ rule "X.0.0: remove Entity contained by Entity of same type" rule "X.1.0: merge intersecting Entities of same type" salience 64 when - $first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) - $second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $first: TextEntity($type: type, $entityType: entityType, !resized(), active()) + $second: TextEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized(), active()) then - RedactionEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document); + TextEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document); $first.remove("X.1.0", "merge intersecting Entities of same type"); $second.remove("X.1.0", "merge intersecting Entities of same type"); retract($first); @@ -1003,8 +1030,8 @@ rule "X.1.0: merge intersecting Entities of same type" rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when - $falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE, isActive()) - $entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized(), active()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -1016,8 +1043,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" salience 64 when - $falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, isActive()) - $recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -1028,8 +1055,8 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" salience 256 when - $entity: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive()) - $recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $entity: TextEntity($type: type, entityType == EntityType.ENTITY, active()) + $recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"); @@ -1041,8 +1068,8 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" salience 256 when - $entity: RedactionEntity(entityType == EntityType.ENTITY, isActive()) - $recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $entity: TextEntity(entityType == EntityType.ENTITY, active()) + $recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized(), active()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY"); retract($recommendation); @@ -1053,8 +1080,8 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY" salience 32 when - $higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive()) - $lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $higherRank: TextEntity($type: type, entityType == EntityType.ENTITY, active()) + $lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized(), active()) then $lowerRank.getIntersectingNodes().forEach(node -> update(node)); $lowerRank.remove("X.6.0", "remove Entity of lower rank, when intersected by entity of type ENTITY"); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl index 453f5caf..ce547025 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl @@ -34,22 +34,22 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; -import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService; +import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService; import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; global Document document global EntityCreationService entityCreationService -global ManualRedactionApplicationService manualRedactionApplicationService +global ManualChangesApplicationService manualChangesApplicationService global Dictionary dictionary // --------------------------------------- queries ------------------------------------------------------------------- @@ -64,7 +64,7 @@ rule "add NER Entities of type CBI_author or CBI_address" when $nerEntity: EntityRecognitionEntity($type: type, (type == "CBI_author" || type == "CBI_address")) then - entityCreationService.byBoundary(new Boundary($nerEntity.getStartOffset(), $nerEntity.getEndOffset()), $type, EntityType.RECOMMENDATION, document) + entityCreationService.byBoundary(new TextRange($nerEntity.getStartOffset(), $nerEntity.getEndOffset()), $type, EntityType.RECOMMENDATION, document) .ifPresent(redactionEntity -> insert(redactionEntity)); end @@ -73,7 +73,7 @@ rule "add NER Entities of type CBI_author or CBI_address" rule "Always redact CBI_author" when - $cbiAuthor: RedactionEntity(type == "CBI_author", entityType == EntityType.ENTITY) + $cbiAuthor: TextEntity(type == "CBI_author", entityType == EntityType.ENTITY) then $cbiAuthor.apply("CBI.0.0", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -83,7 +83,7 @@ rule "Always redact CBI_author" rule "Always redact PII" when - $cbiAuthor: RedactionEntity(type == "PII", entityType == EntityType.ENTITY) + $cbiAuthor: TextEntity(type == "PII", entityType == EntityType.ENTITY) then $cbiAuthor.apply("PII.0.0", "PII found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -96,8 +96,8 @@ rule "Always redact PII" rule "X.0.0: remove Entity contained by Entity of same type" salience 65 when - $larger: RedactionEntity($type: type, $entityType: entityType, isActive()) - $contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $larger: TextEntity($type: type, $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized(), active()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -108,10 +108,10 @@ rule "X.0.0: remove Entity contained by Entity of same type" rule "X.1.0: merge intersecting Entities of same type" salience 64 when - $first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) - $second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $first: TextEntity($type: type, $entityType: entityType, !resized(), active()) + $second: TextEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized(), active()) then - RedactionEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document); + TextEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document); $first.remove("X.1.0", "merge intersecting Entities of same type"); $second.remove("X.1.0", "merge intersecting Entities of same type"); retract($first); @@ -124,8 +124,8 @@ rule "X.1.0: merge intersecting Entities of same type" rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when - $falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE, isActive()) - $entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized(), active()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -137,8 +137,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" salience 64 when - $falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, isActive()) - $recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -149,8 +149,8 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" salience 256 when - $entity: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive()) - $recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $entity: TextEntity($type: type, entityType == EntityType.ENTITY, active()) + $recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"); @@ -162,8 +162,8 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" salience 256 when - $entity: RedactionEntity(entityType == EntityType.ENTITY, isActive()) - $recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $entity: TextEntity(entityType == EntityType.ENTITY, active()) + $recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized(), active()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY"); retract($recommendation); @@ -174,8 +174,8 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY" salience 32 when - $higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive()) - $lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $higherRank: TextEntity($type: type, entityType == EntityType.ENTITY, active()) + $lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized(), active()) then $lowerRank.getIntersectingNodes().forEach(node -> update(node)); $lowerRank.remove("X.6.0", "remove Entity of lower rank, when intersected by entity of type ENTITY"); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl index d37e477a..ab9db674 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl @@ -33,22 +33,23 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; -import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService; +import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService; import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter; import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType -import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; global Document document global EntityCreationService entityCreationService -global ManualRedactionApplicationService manualRedactionApplicationService +global ManualChangesApplicationService manualChangesApplicationService global NerEntitiesAdapter nerEntitiesAdapter global Dictionary dictionary @@ -77,7 +78,7 @@ rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL" rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: RedactionEntity(type == "CBI_author", dictionaryEntry) + $entity: TextEntity(type == "CBI_author", dictionaryEntry) then $entity.apply("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end @@ -85,7 +86,7 @@ rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)" rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: RedactionEntity(type == "CBI_author", dictionaryEntry) + $entity: TextEntity(type == "CBI_author", dictionaryEntry) then $entity.apply("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -95,7 +96,7 @@ rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)" rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: RedactionEntity(type == "CBI_address", dictionaryEntry) + $entity: TextEntity(type == "CBI_address", dictionaryEntry) then $entity.skip("CBI.1.0", "Address found for Non Vertebrate Study"); end @@ -103,7 +104,7 @@ rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)" rule "CBI.1.1: Redact CBI Address (Vertebrate Study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: RedactionEntity(type == "CBI_address", dictionaryEntry) + $entity: TextEntity(type == "CBI_address", dictionaryEntry) then $entity.apply("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -112,9 +113,9 @@ rule "CBI.1.1: Redact CBI Address (Vertebrate Study)" // Rule unit: CBI.2 rule "CBI.2.0: Don't redact genitive CBI_author" when - $entity: RedactionEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), isApplied()) + $entity: TextEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), applied()) then - entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document) + entityCreationService.byBoundary($entity.getTextRange(), "CBI_author", EntityType.FALSE_POSITIVE, document) .ifPresent(falsePositive -> falsePositive.skip("CBI.2.0", "Genitive Author found")); end @@ -299,7 +300,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC rule "PII.0.0: Redact all PII (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $pii: RedactionEntity(type == "PII", dictionaryEntry) + $pii: TextEntity(type == "PII", dictionaryEntry) then $pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end @@ -307,7 +308,7 @@ rule "PII.0.0: Redact all PII (non vertebrate study)" rule "PII.0.1: Redact all PII (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $pii: RedactionEntity(type == "PII", dictionaryEntry) + $pii: TextEntity(type == "PII", dictionaryEntry) then $pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -453,7 +454,7 @@ rule "ETC.3.1: Redact logos (non vertebrate study)" rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'" when not FileAttribute(label == "Confidentiality", value == "confidential") - $dossierRedaction: RedactionEntity(type == "dossier_redaction") + $dossierRedaction: TextEntity(type == "dossier_redaction") then $dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential"); update($dossierRedaction); @@ -492,25 +493,36 @@ rule "AI.1.0: combine and add NER Entities as CBI_address" rule "MAN.0.0: Apply manual resize redaction" salience 128 when - $resizeRedaction: ManualResizeRedaction($id: annotationId) - $entityToBeResized: RedactionEntity(matchesAnnotationId($id)) + $resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeResized: TextEntity(matchesAnnotationId($id)) then - manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction); + manualChangesApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction); retract($resizeRedaction); update($entityToBeResized); $entityToBeResized.getIntersectingNodes().forEach(node -> update(node)); end +rule "MAN.0.1: Apply manual resize redaction" + salience 128 + when + $resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED) + $imageToBeResized: Image(id == $id) + then + manualChangesApplicationService.resizeImage($imageToBeResized, $resizeRedaction); + retract($resizeRedaction); + update($imageToBeResized); + update($imageToBeResized.getParent()); + end + // Rule unit: MAN.1 rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity" salience 128 when - $idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) - not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) - $entityToBeRemoved: RedactionEntity(matchesAnnotationId($id)) + $idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeRemoved: TextEntity(matchesAnnotationId($id)) then - $entityToBeRemoved.ignore("MAN.1.0", "Removed by ManualRedaction"); + $entityToBeRemoved.getManualOverwrite().addChange($idRemoval); update($entityToBeRemoved); retract($idRemoval); $entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node)); @@ -519,11 +531,10 @@ rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image" salience 128 when - $idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) - not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) + $idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED) $imageEntityToBeRemoved: Image($id == id) then - $imageEntityToBeRemoved.ignore("MAN.1.1", "Removed by ManualRedaction"); + $imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval); update($imageEntityToBeRemoved); retract($idRemoval); update($imageEntityToBeRemoved.getParent()); @@ -535,31 +546,72 @@ rule "MAN.2.0: Apply force redaction" no-loop true salience 128 when - $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) - $entityToForce: RedactionEntity(matchesAnnotationId($id)) + $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToForce: TextEntity(matchesAnnotationId($id)) then - $entityToForce.force("MAN.2.0", "Forced redaction", $legalBasis); - $entityToForce.setRemoved(false); - $entityToForce.setIgnored(false); - $entityToForce.setSkipRemoveEntitiesContainedInLarger(true); + $entityToForce.getManualOverwrite().addChange($force); update($entityToForce); $entityToForce.getIntersectingNodes().forEach(node -> update(node)); end - -// Rule unit: MAN.3 -rule "MAN.3.0: Apply image recategorization" +rule "MAN.2.1: Apply force redaction to images" + no-loop true salience 128 when - $recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type) + $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) + $imageToForce: Image(id == $id) + then + $imageToForce.getManualOverwrite().addChange($force); + update($imageToForce); + update($imageToForce.getParent()); + end + + +// Rule unit: MAN.3 +rule "MAN.3.0: Apply entity recategorization" + salience 128 + when + $recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeRecategorized: TextEntity(matchesAnnotationId($id)) + then + $entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node)); + manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization); + retract($recategorization); + // Entity is copied and inserted, so the old entity needs to be retracted to avoid duplication. + retract($entityToBeRecategorized); + end + +rule "MAN.3.1: Apply image recategorization" + salience 128 + when + $recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED) $imageToBeRecategorized: Image($id == id) then - $imageToBeRecategorized.setImageType(ImageType.fromString($imageType)); + manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization); update($imageToBeRecategorized); update($imageToBeRecategorized.getParent()); retract($recategorization); end +// Rule unit: MAN.4 +rule "MAN.4.0: Apply legal basis change" + salience 128 + when + $legalbasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED) + $imageToBeRecategorized: Image($id == id) + then + $imageToBeRecategorized.getManualOverwrite().addChange($legalbasisChange); + end + +rule "MAN.4.1: Apply legal basis change" + salience 128 + when + $legalBasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeChanged: TextEntity(matchesAnnotationId($id)) + then + $entityToBeChanged.getManualOverwrite().addChange($legalBasisChange); + end + //------------------------------------ Entity merging rules ------------------------------------ @@ -567,8 +619,8 @@ rule "MAN.3.0: Apply image recategorization" rule "X.0.0: remove Entity contained by Entity of same type" salience 65 when - $larger: RedactionEntity($type: type, $entityType: entityType, isActive()) - $contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $larger: TextEntity($type: type, $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized(), active()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -579,10 +631,10 @@ rule "X.0.0: remove Entity contained by Entity of same type" rule "X.1.0: merge intersecting Entities of same type" salience 64 when - $first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) - $second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $first: TextEntity($type: type, $entityType: entityType, !resized(), active()) + $second: TextEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized(), active()) then - RedactionEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document); + TextEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document); $first.remove("X.1.0", "merge intersecting Entities of same type"); $second.remove("X.1.0", "merge intersecting Entities of same type"); retract($first); @@ -595,8 +647,8 @@ rule "X.1.0: merge intersecting Entities of same type" rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when - $falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE, isActive()) - $entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized(), active()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -608,8 +660,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" salience 64 when - $falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, isActive()) - $recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -620,8 +672,8 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" salience 256 when - $entity: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive()) - $recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $entity: TextEntity($type: type, entityType == EntityType.ENTITY, active()) + $recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"); @@ -633,8 +685,8 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" salience 256 when - $entity: RedactionEntity(entityType == EntityType.ENTITY, isActive()) - $recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $entity: TextEntity(entityType == EntityType.ENTITY, active()) + $recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized(), active()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY"); retract($recommendation); @@ -645,8 +697,8 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY" salience 32 when - $higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive()) - $lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $higherRank: TextEntity($type: type, entityType == EntityType.ENTITY, active()) + $lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized(), active()) then $lowerRank.getIntersectingNodes().forEach(node -> update(node)); $lowerRank.remove("X.6.0", "remove Entity of lower rank, when intersected by entity of type ENTITY");