Merge branch 'RED-7317' into 'master'
RED-7317: fix behavior of recategorize Closes RED-7317 See merge request redactmanager/redaction-service!113
This commit is contained in:
commit
141c64cde3
@ -16,6 +16,7 @@ val layoutParserVersion = "0.25.0"
|
||||
val jacksonVersion = "2.15.2"
|
||||
val droolsVersion = "8.43.0.Final"
|
||||
val pdfBoxVersion = "3.0.0-alpha2"
|
||||
val persistenceServiceVersion = "2.155.0"
|
||||
|
||||
configurations {
|
||||
all {
|
||||
@ -26,7 +27,7 @@ configurations {
|
||||
dependencies {
|
||||
|
||||
implementation(project(":redaction-service-api-v1")) { exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1") }
|
||||
implementation("com.iqser.red.service:persistence-service-internal-api-v1:2.119.0") { exclude(group = "org.springframework.boot") }
|
||||
implementation("com.iqser.red.service:persistence-service-internal-api-v1:${persistenceServiceVersion}") { exclude(group = "org.springframework.boot") }
|
||||
implementation("com.knecon.fforesight:layoutparser-service-internal-api:${layoutParserVersion}")
|
||||
|
||||
implementation("com.iqser.red.commons:spring-commons:2.7.0")
|
||||
|
||||
@ -11,17 +11,17 @@ import java.util.stream.Collector;
|
||||
|
||||
import com.google.common.base.Functions;
|
||||
|
||||
public class ConsecutiveBoundaryCollector implements Collector<Boundary, List<Boundary>, List<Boundary>> {
|
||||
public class ConsecutiveBoundaryCollector implements Collector<TextRange, List<TextRange>, List<TextRange>> {
|
||||
|
||||
@Override
|
||||
public Supplier<List<Boundary>> supplier() {
|
||||
public Supplier<List<TextRange>> supplier() {
|
||||
|
||||
return LinkedList::new;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public BiConsumer<List<Boundary>, Boundary> accumulator() {
|
||||
public BiConsumer<List<TextRange>, TextRange> accumulator() {
|
||||
|
||||
return (existingList, boundary) -> {
|
||||
if (existingList.isEmpty()) {
|
||||
@ -29,14 +29,14 @@ public class ConsecutiveBoundaryCollector implements Collector<Boundary, List<Bo
|
||||
return;
|
||||
}
|
||||
|
||||
Boundary prevBoundary = existingList.get(existingList.size() - 1);
|
||||
if (prevBoundary.end() > boundary.start()) {
|
||||
throw new IllegalArgumentException(String.format("Can't concatenate %s and %s. Boundaries must be ordered!", prevBoundary, boundary));
|
||||
TextRange prevTextRange = existingList.get(existingList.size() - 1);
|
||||
if (prevTextRange.end() > boundary.start()) {
|
||||
throw new IllegalArgumentException(String.format("Can't concatenate %s and %s. Boundaries must be ordered!", prevTextRange, boundary));
|
||||
}
|
||||
|
||||
if (prevBoundary.end() == boundary.start()) {
|
||||
if (prevTextRange.end() == boundary.start()) {
|
||||
existingList.remove(existingList.size() - 1);
|
||||
existingList.add(Boundary.merge(List.of(prevBoundary, boundary)));
|
||||
existingList.add(TextRange.merge(List.of(prevTextRange, boundary)));
|
||||
} else {
|
||||
existingList.add(boundary);
|
||||
}
|
||||
@ -45,7 +45,7 @@ public class ConsecutiveBoundaryCollector implements Collector<Boundary, List<Bo
|
||||
|
||||
|
||||
@Override
|
||||
public BinaryOperator<List<Boundary>> combiner() {
|
||||
public BinaryOperator<List<TextRange>> combiner() {
|
||||
|
||||
return (list1, list2) -> {
|
||||
list1.addAll(list2);
|
||||
@ -55,7 +55,7 @@ public class ConsecutiveBoundaryCollector implements Collector<Boundary, List<Bo
|
||||
|
||||
|
||||
@Override
|
||||
public Function<List<Boundary>, List<Boundary>> finisher() {
|
||||
public Function<List<TextRange>, List<TextRange>> finisher() {
|
||||
|
||||
return Functions.identity();
|
||||
}
|
||||
|
||||
@ -13,13 +13,13 @@ import lombok.Setter;
|
||||
|
||||
@Setter
|
||||
@EqualsAndHashCode
|
||||
public class Boundary implements Comparable<Boundary> {
|
||||
public class TextRange implements Comparable<TextRange> {
|
||||
|
||||
private int start;
|
||||
private int end;
|
||||
|
||||
|
||||
public Boundary(int start, int end) {
|
||||
public TextRange(int start, int end) {
|
||||
|
||||
if (start > end) {
|
||||
throw new IllegalArgumentException(format("start: %d > end: %d", start, end));
|
||||
@ -47,15 +47,15 @@ public class Boundary implements Comparable<Boundary> {
|
||||
}
|
||||
|
||||
|
||||
public boolean contains(Boundary boundary) {
|
||||
public boolean contains(TextRange textRange) {
|
||||
|
||||
return start <= boundary.start() && boundary.end() <= end;
|
||||
return start <= textRange.start() && textRange.end() <= end;
|
||||
}
|
||||
|
||||
|
||||
public boolean containedBy(Boundary boundary) {
|
||||
public boolean containedBy(TextRange textRange) {
|
||||
|
||||
return boundary.contains(this);
|
||||
return textRange.contains(this);
|
||||
}
|
||||
|
||||
|
||||
@ -83,18 +83,18 @@ public class Boundary implements Comparable<Boundary> {
|
||||
}
|
||||
|
||||
|
||||
public boolean intersects(Boundary boundary) {
|
||||
public boolean intersects(TextRange textRange) {
|
||||
|
||||
return boundary.start() < this.end && this.start < boundary.end();
|
||||
return textRange.start() < this.end && this.start < textRange.end();
|
||||
}
|
||||
|
||||
|
||||
public List<Boundary> split(List<Integer> splitIndices) {
|
||||
public List<TextRange> split(List<Integer> splitIndices) {
|
||||
|
||||
if (splitIndices.stream().anyMatch(idx -> !this.contains(idx))) {
|
||||
throw new IndexOutOfBoundsException(format("%s splitting indices are out of range for %s", splitIndices.stream().filter(idx -> !this.contains(idx)).toList(), this));
|
||||
}
|
||||
List<Boundary> splitBoundaries = new LinkedList<>();
|
||||
List<TextRange> splitBoundaries = new LinkedList<>();
|
||||
int previousIndex = start;
|
||||
for (int splitIndex : splitIndices) {
|
||||
|
||||
@ -102,19 +102,19 @@ public class Boundary implements Comparable<Boundary> {
|
||||
if (splitIndex == previousIndex) {
|
||||
continue;
|
||||
}
|
||||
splitBoundaries.add(new Boundary(previousIndex, splitIndex));
|
||||
splitBoundaries.add(new TextRange(previousIndex, splitIndex));
|
||||
previousIndex = splitIndex;
|
||||
}
|
||||
splitBoundaries.add(new Boundary(previousIndex, end));
|
||||
splitBoundaries.add(new TextRange(previousIndex, end));
|
||||
return splitBoundaries;
|
||||
}
|
||||
|
||||
|
||||
public static Boundary merge(Collection<Boundary> boundaries) {
|
||||
public static TextRange merge(Collection<TextRange> boundaries) {
|
||||
|
||||
int minStart = boundaries.stream().mapToInt(Boundary::start).min().orElseThrow(IllegalArgumentException::new);
|
||||
int maxEnd = boundaries.stream().mapToInt(Boundary::end).max().orElseThrow(IllegalArgumentException::new);
|
||||
return new Boundary(minStart, maxEnd);
|
||||
int minStart = boundaries.stream().mapToInt(TextRange::start).min().orElseThrow(IllegalArgumentException::new);
|
||||
int maxEnd = boundaries.stream().mapToInt(TextRange::end).max().orElseThrow(IllegalArgumentException::new);
|
||||
return new TextRange(minStart, maxEnd);
|
||||
}
|
||||
|
||||
|
||||
@ -126,12 +126,12 @@ public class Boundary implements Comparable<Boundary> {
|
||||
|
||||
|
||||
@Override
|
||||
public int compareTo(Boundary boundary) {
|
||||
public int compareTo(TextRange textRange) {
|
||||
|
||||
if (end < boundary.end() && start < boundary.start()) {
|
||||
if (end < textRange.end() && start < textRange.start()) {
|
||||
return -1;
|
||||
}
|
||||
if (start > boundary.start() && end > boundary.end()) {
|
||||
if (start > textRange.start() && end > textRange.end()) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -145,7 +145,7 @@ public class Boundary implements Comparable<Boundary> {
|
||||
* @param textBlock TextBlock to check whitespaces against
|
||||
* @return trimmed boundary
|
||||
*/
|
||||
public Boundary trim(TextBlock textBlock) {
|
||||
public TextRange trim(TextBlock textBlock) {
|
||||
|
||||
if (this.length() == 0) {
|
||||
return this;
|
||||
@ -160,7 +160,7 @@ public class Boundary implements Comparable<Boundary> {
|
||||
trimmedEnd--;
|
||||
}
|
||||
|
||||
return new Boundary(trimmedStart, Math.max(trimmedEnd, trimmedStart));
|
||||
return new TextRange(trimmedStart, Math.max(trimmedEnd, trimmedStart));
|
||||
}
|
||||
|
||||
}
|
||||
@ -7,38 +7,54 @@ import java.util.Set;
|
||||
|
||||
import lombok.NonNull;
|
||||
|
||||
public interface MatchedRuleHolder {
|
||||
public interface Entity {
|
||||
|
||||
PriorityQueue<MatchedRule> getMatchedRuleList();
|
||||
|
||||
|
||||
boolean isIgnored();
|
||||
ManualChangeOverwrite getManualOverwrite();
|
||||
|
||||
|
||||
boolean isRemoved();
|
||||
// Don't use default accessor pattern (e.g. isIgnored()), as it might lead to errors in drools due to property-specific optimization of the drools planner.
|
||||
default boolean ignored() {
|
||||
|
||||
|
||||
void setIgnored(boolean ignored);
|
||||
|
||||
|
||||
void setRemoved(boolean ignored);
|
||||
|
||||
|
||||
default boolean isApplied() {
|
||||
|
||||
return getMatchedRule().isApplied();
|
||||
return getManualOverwrite().getIgnored().orElse(getMatchedRule().isIgnored());
|
||||
}
|
||||
|
||||
|
||||
default Set<RedactionEntity> getReferences() {
|
||||
default boolean removed() {
|
||||
|
||||
return getManualOverwrite().getRemoved().orElse(getMatchedRule().isRemoved());
|
||||
}
|
||||
|
||||
|
||||
default boolean resized() {
|
||||
|
||||
return getManualOverwrite().getResized().orElse(false);
|
||||
}
|
||||
|
||||
|
||||
default boolean applied() {
|
||||
|
||||
return getManualOverwrite().getApplied().orElse(getMatchedRule().isApplied());
|
||||
}
|
||||
|
||||
|
||||
default boolean hasManualChanges() {
|
||||
|
||||
return !getManualOverwrite().getManualChangeLog().isEmpty();
|
||||
}
|
||||
|
||||
|
||||
default Set<TextEntity> references() {
|
||||
|
||||
return getMatchedRule().getReferences();
|
||||
}
|
||||
|
||||
|
||||
default boolean isActive() {
|
||||
default boolean active() {
|
||||
|
||||
return !(isRemoved() || isIgnored());
|
||||
return !(removed() || ignored());
|
||||
}
|
||||
|
||||
|
||||
@ -82,15 +98,13 @@ public interface MatchedRuleHolder {
|
||||
|
||||
default void remove(String ruleIdentifier, String reason) {
|
||||
|
||||
addMatchedRule(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).build());
|
||||
setRemoved(true);
|
||||
addMatchedRule(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).removed(true).build());
|
||||
}
|
||||
|
||||
|
||||
default void ignore(String ruleIdentifier, String reason) {
|
||||
|
||||
addMatchedRule(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).build());
|
||||
setIgnored(true);
|
||||
addMatchedRule(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).ignored(true).build());
|
||||
}
|
||||
|
||||
|
||||
@ -121,7 +135,7 @@ public interface MatchedRuleHolder {
|
||||
}
|
||||
|
||||
|
||||
default void applyWithReferences(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis, Collection<RedactionEntity> references) {
|
||||
default void applyWithReferences(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis, Collection<TextEntity> references) {
|
||||
|
||||
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
|
||||
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
|
||||
@ -136,7 +150,7 @@ public interface MatchedRuleHolder {
|
||||
}
|
||||
|
||||
|
||||
default void skipWithReferences(@NonNull String ruleIdentifier, String reason, Collection<RedactionEntity> references) {
|
||||
default void skipWithReferences(@NonNull String ruleIdentifier, String reason, Collection<TextEntity> references) {
|
||||
|
||||
getMatchedRuleList().add(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).references(new HashSet<>(references)).build());
|
||||
}
|
||||
@ -150,6 +164,9 @@ public interface MatchedRuleHolder {
|
||||
|
||||
default void addMatchedRules(Collection<MatchedRule> matchedRules) {
|
||||
|
||||
if (getMatchedRuleList().equals(matchedRules)) {
|
||||
return;
|
||||
}
|
||||
getMatchedRuleList().addAll(matchedRules);
|
||||
}
|
||||
|
||||
@ -168,4 +185,22 @@ public interface MatchedRuleHolder {
|
||||
return getMatchedRuleList().peek();
|
||||
}
|
||||
|
||||
|
||||
default String buildReasonWithManualChangeDescriptions() {
|
||||
|
||||
if (getManualOverwrite().getDescriptions().isEmpty()) {
|
||||
return getMatchedRule().getReason();
|
||||
}
|
||||
if (getMatchedRule().getReason().isEmpty()) {
|
||||
return String.join(", ", getManualOverwrite().getDescriptions());
|
||||
}
|
||||
return getMatchedRule().getReason() + ", " + String.join(", ", getManualOverwrite().getDescriptions());
|
||||
}
|
||||
|
||||
|
||||
default String legalBasis() {
|
||||
|
||||
return getManualOverwrite().getLegalBasis().orElse(getMatchedRule().getLegalBasis());
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,200 @@
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.entity;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class ManualChangeOverwrite {
|
||||
|
||||
private static final Map<Class<? extends BaseAnnotation>, String> MANUAL_CHANGE_DESCRIPTIONS = Map.of(//
|
||||
ManualRedactionEntry.class, "created by manual change", //
|
||||
ManualLegalBasisChange.class, "legal basis was manually changed", //
|
||||
ManualResizeRedaction.class, "resized by manual override", //
|
||||
ManualForceRedaction.class, "forced by manual override", //
|
||||
IdRemoval.class, "removed by manual override", //
|
||||
ManualImageRecategorization.class, "recategorized by manual override");
|
||||
|
||||
List<BaseAnnotation> manualChanges = new LinkedList<>();
|
||||
boolean changed;
|
||||
List<String> descriptions;
|
||||
String type;
|
||||
String legalBasis;
|
||||
String section;
|
||||
String value;
|
||||
Boolean applied;
|
||||
Boolean removed;
|
||||
Boolean ignored;
|
||||
Boolean resized;
|
||||
Boolean recategorized;
|
||||
|
||||
|
||||
public void calculateCurrentOverride() {
|
||||
|
||||
if (!changed) {
|
||||
return;
|
||||
}
|
||||
List<BaseAnnotation> sortedManualChanges = getManualChangeLog();
|
||||
updateFields(sortedManualChanges);
|
||||
}
|
||||
|
||||
|
||||
public List<BaseAnnotation> getManualChangeLog() {
|
||||
|
||||
if (!changed) {
|
||||
return manualChanges;
|
||||
}
|
||||
manualChanges.sort(Comparator.comparing(BaseAnnotation::getRequestDate));
|
||||
updateFields(manualChanges);
|
||||
// make list unmodifiable.
|
||||
return manualChanges.stream().toList();
|
||||
}
|
||||
|
||||
|
||||
private void updateFields(List<BaseAnnotation> sortedManualChanges) {
|
||||
|
||||
descriptions = new LinkedList<>();
|
||||
|
||||
for (BaseAnnotation manualChange : sortedManualChanges) {
|
||||
// ManualRedactionEntries are created prior to rule execution in analysis service.
|
||||
|
||||
if (manualChange instanceof IdRemoval) {
|
||||
applied = false;
|
||||
ignored = true;
|
||||
}
|
||||
|
||||
if (manualChange instanceof ManualForceRedaction manualForceRedaction) {
|
||||
removed = false;
|
||||
ignored = false;
|
||||
applied = true;
|
||||
legalBasis = manualForceRedaction.getLegalBasis();
|
||||
}
|
||||
|
||||
if (manualChange instanceof ManualLegalBasisChange manualLegalBasisChange) {
|
||||
section = manualLegalBasisChange.getSection();
|
||||
legalBasis = manualLegalBasisChange.getLegalBasis();
|
||||
value = manualLegalBasisChange.getValue();
|
||||
}
|
||||
|
||||
if (manualChange instanceof ManualResizeRedaction) {
|
||||
// resizing logic happens in ManualChangesApplicationService.
|
||||
resized = true;
|
||||
}
|
||||
|
||||
if (manualChange instanceof ManualImageRecategorization recategorization) {
|
||||
// recategorization logic happens in ManualChangesApplicationService.
|
||||
recategorized = true;
|
||||
// this is only relevant for ManualEntities. Image and TextEntity is recategorized in the ManualChangesApplicationService.
|
||||
type = recategorization.getType();
|
||||
}
|
||||
|
||||
descriptions.add(MANUAL_CHANGE_DESCRIPTIONS.get(manualChange.getClass()));
|
||||
}
|
||||
changed = false;
|
||||
}
|
||||
|
||||
|
||||
public void addChange(BaseAnnotation manualChange) {
|
||||
|
||||
changed = true;
|
||||
manualChanges.add(manualChange);
|
||||
}
|
||||
|
||||
|
||||
public void addChanges(List<BaseAnnotation> manualChangeLog) {
|
||||
|
||||
changed = true;
|
||||
manualChanges.addAll(manualChangeLog);
|
||||
}
|
||||
|
||||
|
||||
public Optional<String> getLegalBasis() {
|
||||
|
||||
calculateCurrentOverride();
|
||||
return legalBasis == null ? Optional.empty() : Optional.of(legalBasis);
|
||||
}
|
||||
|
||||
|
||||
public Optional<String> getType() {
|
||||
|
||||
calculateCurrentOverride();
|
||||
return type == null ? Optional.empty() : Optional.of(type);
|
||||
}
|
||||
|
||||
|
||||
public Optional<String> getSection() {
|
||||
|
||||
calculateCurrentOverride();
|
||||
return section == null ? Optional.empty() : Optional.of(section);
|
||||
}
|
||||
|
||||
|
||||
public Optional<String> getValue() {
|
||||
|
||||
calculateCurrentOverride();
|
||||
return value == null ? Optional.empty() : Optional.of(value);
|
||||
}
|
||||
|
||||
|
||||
public Optional<Boolean> getApplied() {
|
||||
|
||||
calculateCurrentOverride();
|
||||
return applied == null ? Optional.empty() : Optional.of(applied);
|
||||
}
|
||||
|
||||
|
||||
public Optional<Boolean> getRemoved() {
|
||||
|
||||
calculateCurrentOverride();
|
||||
return removed == null ? Optional.empty() : Optional.of(removed);
|
||||
}
|
||||
|
||||
|
||||
public Optional<Boolean> getIgnored() {
|
||||
|
||||
calculateCurrentOverride();
|
||||
return ignored == null ? Optional.empty() : Optional.of(ignored);
|
||||
}
|
||||
|
||||
|
||||
public Optional<Boolean> getResized() {
|
||||
|
||||
calculateCurrentOverride();
|
||||
return resized == null ? Optional.empty() : Optional.of(resized);
|
||||
}
|
||||
|
||||
|
||||
public Optional<Boolean> getRecategorized() {
|
||||
|
||||
calculateCurrentOverride();
|
||||
return recategorized == null ? Optional.empty() : Optional.of(recategorized);
|
||||
}
|
||||
|
||||
|
||||
public List<String> getDescriptions() {
|
||||
|
||||
calculateCurrentOverride();
|
||||
return descriptions == null ? Collections.emptyList() : descriptions;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.entity;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
@ -18,6 +19,10 @@ import lombok.experimental.FieldDefaults;
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public final class MatchedRule implements Comparable<MatchedRule> {
|
||||
|
||||
public static final String FINAL_TYPE = "FINAL";
|
||||
public static final String ELIMINATION_RULE_TYPE = "X";
|
||||
private static final List<String> RULE_TYPE_PRIORITIES = List.of(FINAL_TYPE, ELIMINATION_RULE_TYPE);
|
||||
|
||||
@Builder.Default
|
||||
RuleIdentifier ruleIdentifier = RuleIdentifier.empty();
|
||||
@Builder.Default
|
||||
@ -26,8 +31,11 @@ public final class MatchedRule implements Comparable<MatchedRule> {
|
||||
String legalBasis = "";
|
||||
boolean applied;
|
||||
boolean writeValueWithLineBreaks;
|
||||
boolean removed;
|
||||
boolean ignored;
|
||||
boolean resized;
|
||||
@Builder.Default
|
||||
Set<RedactionEntity> references = Collections.emptySet();
|
||||
Set<TextEntity> references = Collections.emptySet();
|
||||
|
||||
|
||||
public static MatchedRule empty() {
|
||||
@ -39,32 +47,40 @@ public final class MatchedRule implements Comparable<MatchedRule> {
|
||||
@Override
|
||||
public int compareTo(MatchedRule matchedRule) {
|
||||
|
||||
// Only the highest ranked rule is actually applied, this method defines the highest order.
|
||||
// First, it compares the Rule Type, RULE_TYPE_PRIORITIES defines the order of types.
|
||||
// Types not in the list have the lowest priority.
|
||||
// The ones in the list are technical exceptions and should override any other Rule.
|
||||
// Aside from them Entities should never match from more than one type!
|
||||
// E.g. a CBI_author entity should **always** only match CBI.*.* rules.
|
||||
// Otherwise, something went wrong with the rules. :)
|
||||
RuleIdentifier otherRuleIdentifier = matchedRule.getRuleIdentifier();
|
||||
if (!Objects.equals(ruleIdentifier.type(), otherRuleIdentifier.type())) {
|
||||
if (Objects.equals(otherRuleIdentifier.type(), "MAN")) {
|
||||
return 1;
|
||||
}
|
||||
if (Objects.equals(ruleIdentifier.type(), "MAN")) {
|
||||
return -1;
|
||||
}
|
||||
if (Objects.equals(otherRuleIdentifier.type(), "X")) {
|
||||
return 1;
|
||||
}
|
||||
if (Objects.equals(ruleIdentifier.type(), "X")) {
|
||||
return -1;
|
||||
}
|
||||
boolean thisInList = RULE_TYPE_PRIORITIES.contains(this.getRuleIdentifier().type());
|
||||
boolean otherInList = RULE_TYPE_PRIORITIES.contains(otherRuleIdentifier.type());
|
||||
|
||||
// Compare the types
|
||||
if (thisInList && !otherInList) {
|
||||
return -1;
|
||||
} else if (!thisInList && otherInList) {
|
||||
return 1;
|
||||
} else if (thisInList && otherInList) {
|
||||
int thisIndex = RULE_TYPE_PRIORITIES.indexOf(this.getRuleIdentifier().type());
|
||||
int otherIndex = RULE_TYPE_PRIORITIES.indexOf(otherRuleIdentifier.type());
|
||||
return Integer.compare(thisIndex, otherIndex);
|
||||
}
|
||||
// Then compare the unit
|
||||
if (!Objects.equals(otherRuleIdentifier.unit(), getRuleIdentifier().unit())) {
|
||||
return otherRuleIdentifier.unit() - ruleIdentifier.unit();
|
||||
return Integer.compare(otherRuleIdentifier.unit(), ruleIdentifier.unit());
|
||||
}
|
||||
return otherRuleIdentifier.id() - ruleIdentifier.id();
|
||||
// Then compare the id inside the unit
|
||||
return Integer.compare(otherRuleIdentifier.id(), ruleIdentifier.id());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return "MatchedRule[" + "ruleIdentifier=" + ruleIdentifier + ", " + "reason=" + reason + ", " + "legalBasis=" + legalBasis + ", " + "applied=" + applied + ", " + "writeValueWithLineBreaks=" + writeValueWithLineBreaks + ", " + "references=" + references + ']';
|
||||
return "MatchedRule[ruleIdentifier=" + ruleIdentifier + ", reason=" + reason + ", legalBasis=" + legalBasis + ", applied=" + applied + ", writeValueWithLineBreaks=" + writeValueWithLineBreaks + ", references=" + references + ']';
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -13,12 +13,13 @@ import lombok.experimental.FieldDefaults;
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class RedactionPosition {
|
||||
public class PositionOnPage {
|
||||
|
||||
// Each entry in this list corresponds to an entry in the redaction log, this means:
|
||||
// A single entity might be represented by multiple redaction log entries
|
||||
// This is due to the RedactionLog only being able to handle a single page per entry.
|
||||
final String id;
|
||||
Page page;
|
||||
// Each entry in this list corresponds to an entry in the redaction log, this means:
|
||||
// An entity might be represented by multiple redaction log entries
|
||||
List<Rectangle2D> rectanglePerLine;
|
||||
|
||||
}
|
||||
@ -11,9 +11,9 @@ import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
@ -28,29 +28,28 @@ import lombok.experimental.FieldDefaults;
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
|
||||
public class RedactionEntity implements MatchedRuleHolder {
|
||||
public class TextEntity implements Entity {
|
||||
|
||||
// initial values
|
||||
// primary key
|
||||
@EqualsAndHashCode.Include
|
||||
final Boundary boundary;
|
||||
final TextRange textRange;
|
||||
@EqualsAndHashCode.Include
|
||||
final String type;
|
||||
@EqualsAndHashCode.Include
|
||||
final EntityType entityType;
|
||||
// primary key end
|
||||
|
||||
// empty defaults
|
||||
boolean removed;
|
||||
boolean ignored;
|
||||
@Builder.Default
|
||||
final PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
|
||||
@Builder.Default
|
||||
final ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite();
|
||||
|
||||
boolean resized;
|
||||
boolean skipRemoveEntitiesContainedInLarger;
|
||||
boolean dictionaryEntry;
|
||||
boolean dossierDictionaryEntry;
|
||||
|
||||
@Builder.Default
|
||||
Set<Engine> engines = new HashSet<>();
|
||||
|
||||
@Builder.Default
|
||||
PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
|
||||
|
||||
// inferred on graph insertion
|
||||
String value;
|
||||
@ -58,15 +57,15 @@ public class RedactionEntity implements MatchedRuleHolder {
|
||||
String textAfter;
|
||||
@Builder.Default
|
||||
Set<Page> pages = new HashSet<>();
|
||||
List<RedactionPosition> redactionPositionsPerPage;
|
||||
List<PositionOnPage> positionsOnPagePerPage;
|
||||
@Builder.Default
|
||||
List<SemanticNode> intersectingNodes = new LinkedList<>();
|
||||
SemanticNode deepestFullyContainingNode;
|
||||
|
||||
|
||||
public static RedactionEntity initialEntityNode(Boundary boundary, String type, EntityType entityType) {
|
||||
public static TextEntity initialEntityNode(TextRange textRange, String type, EntityType entityType) {
|
||||
|
||||
return RedactionEntity.builder().type(type).entityType(entityType).boundary(boundary).build();
|
||||
return TextEntity.builder().type(type).entityType(entityType).textRange(textRange).build();
|
||||
}
|
||||
|
||||
|
||||
@ -102,7 +101,7 @@ public class RedactionEntity implements MatchedRuleHolder {
|
||||
|
||||
public String getValueWithLineBreaks() {
|
||||
|
||||
return getDeepestFullyContainingNode().getTextBlock().subSequenceWithLineBreaks(getBoundary());
|
||||
return getDeepestFullyContainingNode().getTextBlock().subSequenceWithLineBreaks(getTextRange());
|
||||
}
|
||||
|
||||
|
||||
@ -113,14 +112,14 @@ public class RedactionEntity implements MatchedRuleHolder {
|
||||
intersectingNodes = new LinkedList<>();
|
||||
deepestFullyContainingNode = null;
|
||||
pages = new HashSet<>();
|
||||
removed = true;
|
||||
remove("FINAL.0.0", "removed completely");
|
||||
}
|
||||
|
||||
|
||||
public List<RedactionPosition> getRedactionPositionsPerPage() {
|
||||
public List<PositionOnPage> getPositionsOnPagePerPage() {
|
||||
|
||||
if (redactionPositionsPerPage == null || redactionPositionsPerPage.isEmpty()) {
|
||||
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = deepestFullyContainingNode.getTextBlock().getPositionsPerPage(boundary);
|
||||
if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) {
|
||||
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = deepestFullyContainingNode.getTextBlock().getPositionsPerPage(textRange);
|
||||
|
||||
Page firstPage = rectanglesPerLinePerPage.keySet()
|
||||
.stream()
|
||||
@ -128,37 +127,37 @@ public class RedactionEntity implements MatchedRuleHolder {
|
||||
.orElseThrow(() -> new RuntimeException("No Positions found on any page!"));
|
||||
|
||||
String id = IdBuilder.buildId(pages, rectanglesPerLinePerPage.values().stream().flatMap(Collection::stream).toList(), type, entityType.name());
|
||||
redactionPositionsPerPage = rectanglesPerLinePerPage.entrySet().stream().map(entry -> buildRedactionPosition(firstPage, id, entry)).toList();
|
||||
positionsOnPagePerPage = rectanglesPerLinePerPage.entrySet().stream().map(entry -> buildPositionOnPage(firstPage, id, entry)).toList();
|
||||
}
|
||||
return redactionPositionsPerPage;
|
||||
return positionsOnPagePerPage;
|
||||
}
|
||||
|
||||
|
||||
private static RedactionPosition buildRedactionPosition(Page firstPage, String id, Map.Entry<Page, List<Rectangle2D>> entry) {
|
||||
private static PositionOnPage buildPositionOnPage(Page firstPage, String id, Map.Entry<Page, List<Rectangle2D>> entry) {
|
||||
|
||||
if (entry.getKey().equals(firstPage)) {
|
||||
return new RedactionPosition(id, entry.getKey(), entry.getValue());
|
||||
return new PositionOnPage(id, entry.getKey(), entry.getValue());
|
||||
} else {
|
||||
return new RedactionPosition(id + "-" + entry.getKey().getNumber(), entry.getKey(), entry.getValue());
|
||||
return new PositionOnPage(id + "-" + entry.getKey().getNumber(), entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public boolean containedBy(RedactionEntity redactionEntity) {
|
||||
public boolean containedBy(TextEntity textEntity) {
|
||||
|
||||
return this.boundary.containedBy(redactionEntity.getBoundary());
|
||||
return this.textRange.containedBy(textEntity.getTextRange());
|
||||
}
|
||||
|
||||
|
||||
public boolean contains(RedactionEntity redactionEntity) {
|
||||
public boolean contains(TextEntity textEntity) {
|
||||
|
||||
return this.boundary.contains(redactionEntity.getBoundary());
|
||||
return this.textRange.contains(textEntity.getTextRange());
|
||||
}
|
||||
|
||||
|
||||
public boolean intersects(RedactionEntity redactionEntity) {
|
||||
public boolean intersects(TextEntity textEntity) {
|
||||
|
||||
return this.boundary.intersects(redactionEntity.getBoundary());
|
||||
return this.textRange.intersects(textEntity.getTextRange());
|
||||
}
|
||||
|
||||
|
||||
@ -176,7 +175,7 @@ public class RedactionEntity implements MatchedRuleHolder {
|
||||
|
||||
public boolean matchesAnnotationId(String manualRedactionId) {
|
||||
|
||||
return getRedactionPositionsPerPage().stream().anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId));
|
||||
return getPositionsOnPagePerPage().stream().anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId));
|
||||
}
|
||||
|
||||
|
||||
@ -187,7 +186,7 @@ public class RedactionEntity implements MatchedRuleHolder {
|
||||
sb.append("Entity[\"");
|
||||
sb.append(value);
|
||||
sb.append("\", ");
|
||||
sb.append(boundary);
|
||||
sb.append(textRange);
|
||||
sb.append(", pages[");
|
||||
pages.forEach(page -> {
|
||||
sb.append(page.getNumber());
|
||||
@ -11,7 +11,7 @@ import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
|
||||
|
||||
@ -34,7 +34,7 @@ public class Document implements GenericSemanticNode {
|
||||
Integer numberOfPages;
|
||||
TextBlock textBlock;
|
||||
@Builder.Default
|
||||
Set<RedactionEntity> entities = new HashSet<>();
|
||||
Set<TextEntity> entities = new HashSet<>();
|
||||
@Builder.Default
|
||||
static final SectionIdentifier sectionIdentifier = SectionIdentifier.document();
|
||||
|
||||
|
||||
@ -5,7 +5,7 @@ import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
@ -34,7 +34,7 @@ public class Footer implements GenericSemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
@EqualsAndHashCode.Exclude
|
||||
Set<RedactionEntity> entities = new HashSet<>();
|
||||
Set<TextEntity> entities = new HashSet<>();
|
||||
|
||||
|
||||
@Override
|
||||
|
||||
@ -4,7 +4,7 @@ import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
|
||||
@ -34,7 +34,7 @@ public class Header implements GenericSemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
@EqualsAndHashCode.Exclude
|
||||
Set<RedactionEntity> entities = new HashSet<>();
|
||||
Set<TextEntity> entities = new HashSet<>();
|
||||
|
||||
|
||||
@Override
|
||||
|
||||
@ -5,7 +5,7 @@ import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
|
||||
@ -33,7 +33,7 @@ public class Headline implements GenericSemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
@EqualsAndHashCode.Exclude
|
||||
Set<RedactionEntity> entities = new HashSet<>();
|
||||
Set<TextEntity> entities = new HashSet<>();
|
||||
|
||||
|
||||
@Override
|
||||
|
||||
@ -9,10 +9,11 @@ import java.util.Map;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.ManualChangeOverwrite;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRuleHolder;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
|
||||
|
||||
@ -29,7 +30,7 @@ import lombok.experimental.FieldDefaults;
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class Image implements GenericSemanticNode, MatchedRuleHolder {
|
||||
public class Image implements GenericSemanticNode, Entity {
|
||||
|
||||
List<Integer> treeId;
|
||||
String id;
|
||||
@ -38,12 +39,12 @@ public class Image implements GenericSemanticNode, MatchedRuleHolder {
|
||||
boolean transparent;
|
||||
Rectangle2D position;
|
||||
|
||||
boolean removed;
|
||||
boolean ignored;
|
||||
|
||||
@Builder.Default
|
||||
PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
|
||||
|
||||
@Builder.Default
|
||||
ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite();
|
||||
|
||||
@EqualsAndHashCode.Exclude
|
||||
Page page;
|
||||
|
||||
@ -52,7 +53,7 @@ public class Image implements GenericSemanticNode, MatchedRuleHolder {
|
||||
|
||||
@Builder.Default
|
||||
@EqualsAndHashCode.Exclude
|
||||
Set<RedactionEntity> entities = new HashSet<>();
|
||||
Set<TextEntity> entities = new HashSet<>();
|
||||
|
||||
|
||||
@Override
|
||||
|
||||
@ -1,11 +1,10 @@
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
|
||||
|
||||
@ -40,7 +39,7 @@ public class Page {
|
||||
|
||||
@Builder.Default
|
||||
@EqualsAndHashCode.Exclude
|
||||
Set<RedactionEntity> entities = new HashSet<>();
|
||||
Set<TextEntity> entities = new HashSet<>();
|
||||
|
||||
@Builder.Default
|
||||
@EqualsAndHashCode.Exclude
|
||||
|
||||
@ -5,7 +5,7 @@ import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
@ -29,7 +29,7 @@ public class Paragraph implements GenericSemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
@EqualsAndHashCode.Exclude
|
||||
Set<RedactionEntity> entities = new HashSet<>();
|
||||
Set<TextEntity> entities = new HashSet<>();
|
||||
|
||||
|
||||
@Override
|
||||
|
||||
@ -4,7 +4,7 @@ import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
|
||||
@ -32,7 +32,7 @@ public class Section implements GenericSemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
@EqualsAndHashCode.Exclude
|
||||
Set<RedactionEntity> entities = new HashSet<>();
|
||||
Set<TextEntity> entities = new HashSet<>();
|
||||
|
||||
|
||||
@Override
|
||||
|
||||
@ -13,9 +13,9 @@ import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
|
||||
@ -43,11 +43,11 @@ public interface SemanticNode {
|
||||
|
||||
/**
|
||||
* Any Node maintains its own Set of Entities.
|
||||
* This Set contains all Entities whose boundary intersects the boundary of this node.
|
||||
* This Set contains all Entities whose TextRange intersects the TextRange of this node.
|
||||
*
|
||||
* @return Set of all Entities associated with this Node
|
||||
*/
|
||||
Set<RedactionEntity> getEntities();
|
||||
Set<TextEntity> getEntities();
|
||||
|
||||
|
||||
/**
|
||||
@ -72,16 +72,16 @@ public interface SemanticNode {
|
||||
|
||||
|
||||
/**
|
||||
* Each AtomicTextBlock is assigned a page, so to get the pages for this boundary, it collects the PageNodes from each AtomicTextBlock belonging to this node's TextBlock.
|
||||
* Each AtomicTextBlock is assigned a page, so to get the pages for this TextRange, it collects the PageNodes from each AtomicTextBlock belonging to this node's TextBlock.
|
||||
*
|
||||
* @return Set of PageNodes this node appears on.
|
||||
*/
|
||||
default Set<Page> getPages(Boundary boundary) {
|
||||
default Set<Page> getPages(TextRange textRange) {
|
||||
|
||||
if (!getBoundary().contains(boundary)) {
|
||||
throw new IllegalArgumentException(format("%s which was used to query for pages is not contained in the %s of this node!", boundary, getBoundary()));
|
||||
if (!getTextRange().contains(textRange)) {
|
||||
throw new IllegalArgumentException(format("%s which was used to query for pages is not contained in the %s of this node!", textRange, getTextRange()));
|
||||
}
|
||||
return getTextBlock().getPages(boundary);
|
||||
return getTextBlock().getPages(textRange);
|
||||
}
|
||||
|
||||
|
||||
@ -215,7 +215,7 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean hasEntitiesOfType(String type) {
|
||||
|
||||
return getEntities().stream().filter(RedactionEntity::isActive).anyMatch(redactionEntity -> redactionEntity.getType().equals(type));
|
||||
return getEntities().stream().filter(TextEntity::active).anyMatch(redactionEntity -> redactionEntity.getType().equals(type));
|
||||
}
|
||||
|
||||
|
||||
@ -228,7 +228,7 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean hasEntitiesOfAnyType(String... types) {
|
||||
|
||||
return getEntities().stream().filter(RedactionEntity::isActive).anyMatch(redactionEntity -> Arrays.stream(types).anyMatch(type -> redactionEntity.getType().equals(type)));
|
||||
return getEntities().stream().filter(TextEntity::active).anyMatch(redactionEntity -> Arrays.stream(types).anyMatch(type -> redactionEntity.getType().equals(type)));
|
||||
}
|
||||
|
||||
|
||||
@ -242,8 +242,8 @@ public interface SemanticNode {
|
||||
default boolean hasEntitiesOfAllTypes(String... types) {
|
||||
|
||||
return getEntities().stream()
|
||||
.filter(RedactionEntity::isActive)
|
||||
.map(RedactionEntity::getType)
|
||||
.filter(TextEntity::active)
|
||||
.map(TextEntity::getType)
|
||||
.collect(Collectors.toUnmodifiableSet())
|
||||
.containsAll(Arrays.stream(types).toList());
|
||||
}
|
||||
@ -256,9 +256,9 @@ public interface SemanticNode {
|
||||
* @param type string representing the type of entities to return
|
||||
* @return List of RedactionEntities of any the type
|
||||
*/
|
||||
default List<RedactionEntity> getEntitiesOfType(String type) {
|
||||
default List<TextEntity> getEntitiesOfType(String type) {
|
||||
|
||||
return getEntities().stream().filter(RedactionEntity::isActive).filter(redactionEntity -> redactionEntity.getType().equals(type)).toList();
|
||||
return getEntities().stream().filter(TextEntity::active).filter(redactionEntity -> redactionEntity.getType().equals(type)).toList();
|
||||
}
|
||||
|
||||
|
||||
@ -269,9 +269,9 @@ public interface SemanticNode {
|
||||
* @param types A list of strings representing the types of entities to return
|
||||
* @return List of RedactionEntities of any provided type
|
||||
*/
|
||||
default List<RedactionEntity> getEntitiesOfType(List<String> types) {
|
||||
default List<TextEntity> getEntitiesOfType(List<String> types) {
|
||||
|
||||
return getEntities().stream().filter(RedactionEntity::isActive).filter(redactionEntity -> redactionEntity.isAnyType(types)).toList();
|
||||
return getEntities().stream().filter(TextEntity::active).filter(redactionEntity -> redactionEntity.isAnyType(types)).toList();
|
||||
}
|
||||
|
||||
|
||||
@ -282,9 +282,9 @@ public interface SemanticNode {
|
||||
* @param types A list of strings representing the types of entities to return
|
||||
* @return List of RedactionEntities that match any of the provided types
|
||||
*/
|
||||
default List<RedactionEntity> getEntitiesOfType(String... types) {
|
||||
default List<TextEntity> getEntitiesOfType(String... types) {
|
||||
|
||||
return getEntities().stream().filter(RedactionEntity::isActive).filter(redactionEntity -> redactionEntity.isAnyType(Arrays.stream(types).toList())).toList();
|
||||
return getEntities().stream().filter(TextEntity::active).filter(redactionEntity -> redactionEntity.isAnyType(Arrays.stream(types).toList())).toList();
|
||||
}
|
||||
|
||||
|
||||
@ -440,22 +440,22 @@ public interface SemanticNode {
|
||||
|
||||
|
||||
/**
|
||||
* This function is used during insertion of EntityNodes into the graph, it checks if the boundary of the RedactionEntity intersects or even contains the RedactionEntity.
|
||||
* This function is used during insertion of EntityNodes into the graph, it checks if the TextRange of the RedactionEntity intersects or even contains the RedactionEntity.
|
||||
* It sets the fields accordingly and recursively calls this function on all its children.
|
||||
*
|
||||
* @param redactionEntity RedactionEntity, which is being inserted into the graph
|
||||
* @param textEntity RedactionEntity, which is being inserted into the graph
|
||||
*/
|
||||
default void addThisToEntityIfIntersects(RedactionEntity redactionEntity) {
|
||||
default void addThisToEntityIfIntersects(TextEntity textEntity) {
|
||||
|
||||
TextBlock textBlock = getTextBlock();
|
||||
if (textBlock.getBoundary().intersects(redactionEntity.getBoundary())) {
|
||||
if (textBlock.containsBoundary(redactionEntity.getBoundary())) {
|
||||
redactionEntity.setDeepestFullyContainingNode(this);
|
||||
if (textBlock.getTextRange().intersects(textEntity.getTextRange())) {
|
||||
if (textBlock.containsTextRange(textEntity.getTextRange())) {
|
||||
textEntity.setDeepestFullyContainingNode(this);
|
||||
}
|
||||
|
||||
redactionEntity.addIntersectingNode(this);
|
||||
streamChildren().filter(semanticNode -> semanticNode.getBoundary().intersects(redactionEntity.getBoundary()))
|
||||
.forEach(node -> node.addThisToEntityIfIntersects(redactionEntity));
|
||||
textEntity.addIntersectingNode(this);
|
||||
streamChildren().filter(semanticNode -> semanticNode.getTextRange().intersects(textEntity.getTextRange()))
|
||||
.forEach(node -> node.addThisToEntityIfIntersects(textEntity));
|
||||
}
|
||||
}
|
||||
|
||||
@ -505,13 +505,13 @@ public interface SemanticNode {
|
||||
|
||||
|
||||
/**
|
||||
* The Boundary is the start and end string offsets in the reading order of the document.
|
||||
* The TextRange is the start and end string offsets in the reading order of the document.
|
||||
*
|
||||
* @return Boundary of this Node's TextBlock
|
||||
* @return TextRange of this Node's TextBlock
|
||||
*/
|
||||
default Boundary getBoundary() {
|
||||
default TextRange getTextRange() {
|
||||
|
||||
return getTextBlock().getBoundary();
|
||||
return getTextBlock().getTextRange();
|
||||
}
|
||||
|
||||
|
||||
@ -522,7 +522,7 @@ public interface SemanticNode {
|
||||
*/
|
||||
default int length() {
|
||||
|
||||
return getBoundary().length();
|
||||
return getTextRange().length();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -11,7 +11,7 @@ import java.util.stream.IntStream;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
|
||||
|
||||
@ -38,7 +38,7 @@ public class Table implements SemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
@EqualsAndHashCode.Exclude
|
||||
Set<RedactionEntity> entities = new HashSet<>();
|
||||
Set<TextEntity> entities = new HashSet<>();
|
||||
|
||||
|
||||
/**
|
||||
@ -47,7 +47,7 @@ public class Table implements SemanticNode {
|
||||
* @param strings Strings to check whether a row contains them
|
||||
* @return Stream of all entities in this table, that appear in a row, which contains any of the provided strings
|
||||
*/
|
||||
public Stream<RedactionEntity> streamEntitiesWhereRowContainsStringsIgnoreCase(List<String> strings) {
|
||||
public Stream<TextEntity> streamEntitiesWhereRowContainsStringsIgnoreCase(List<String> strings) {
|
||||
|
||||
return IntStream.range(0, numberOfRows)
|
||||
.boxed()
|
||||
@ -79,7 +79,7 @@ public class Table implements SemanticNode {
|
||||
* @param value the string which the table cell should contain
|
||||
* @return a stream of all entities, which appear in a row where at least one cell has the provided header and the provided value.
|
||||
*/
|
||||
public Stream<RedactionEntity> streamEntitiesWhereRowHasHeaderAndValue(String header, String value) {
|
||||
public Stream<TextEntity> streamEntitiesWhereRowHasHeaderAndValue(String header, String value) {
|
||||
|
||||
List<Integer> vertebrateStudyCols = streamHeaders().filter(headerNode -> headerNode.containsString(header)).map(TableCell::getCol).toList();
|
||||
return streamTableCells().filter(tableCellNode -> vertebrateStudyCols.stream()
|
||||
@ -94,7 +94,7 @@ public class Table implements SemanticNode {
|
||||
* @param values the strings which the table cell should contain
|
||||
* @return a stream of all entities, which appear in a row where at least one cell has the provided header and any provided value.
|
||||
*/
|
||||
public Stream<RedactionEntity> streamEntitiesWhereRowHasHeaderAndAnyValue(String header, List<String> values) {
|
||||
public Stream<TextEntity> streamEntitiesWhereRowHasHeaderAndAnyValue(String header, List<String> values) {
|
||||
|
||||
List<Integer> colsWithHeader = streamHeaders().filter(headerNode -> headerNode.containsString(header)).map(TableCell::getCol).toList();
|
||||
return streamTableCells().filter(tableCellNode -> colsWithHeader.stream()
|
||||
@ -109,12 +109,12 @@ public class Table implements SemanticNode {
|
||||
* @param types type strings to check whether a row contains an entity like them
|
||||
* @return Stream of all entities in this table, that appear in a row, which contains at least one entity with any of the provided types.
|
||||
*/
|
||||
public Stream<RedactionEntity> streamEntitiesWhereRowContainsEntitiesOfType(List<String> types) {
|
||||
public Stream<TextEntity> streamEntitiesWhereRowContainsEntitiesOfType(List<String> types) {
|
||||
|
||||
List<Integer> rowsWithEntityOfType = getEntities().stream()
|
||||
.filter(RedactionEntity::isActive)
|
||||
.filter(TextEntity::active)
|
||||
.filter(redactionEntity -> types.stream().anyMatch(type -> type.equals(redactionEntity.getType())))
|
||||
.map(RedactionEntity::getIntersectingNodes)
|
||||
.map(TextEntity::getIntersectingNodes)
|
||||
.filter(node -> node instanceof TableCell)
|
||||
.map(node -> (TableCell) node)
|
||||
.map(TableCell::getRow)
|
||||
@ -131,13 +131,13 @@ public class Table implements SemanticNode {
|
||||
* @param types type strings to check whether a row contains an entity like them
|
||||
* @return Stream of all entities in this table, that appear in a row, which contains at least one entity with any of the provided types.
|
||||
*/
|
||||
public Stream<RedactionEntity> streamEntitiesWhereRowContainsNoEntitiesOfType(List<String> types) {
|
||||
public Stream<TextEntity> streamEntitiesWhereRowContainsNoEntitiesOfType(List<String> types) {
|
||||
|
||||
return IntStream.range(0, numberOfRows)
|
||||
.boxed()
|
||||
.filter(rowNumber -> streamRow(rowNumber).map(TableCell::getEntities)
|
||||
.flatMap(Collection::stream)
|
||||
.filter(RedactionEntity::isActive)
|
||||
.filter(TextEntity::active)
|
||||
.noneMatch(entity -> types.contains(entity.getType())))
|
||||
.flatMap(this::streamRow)
|
||||
.map(TableCell::getEntities)
|
||||
@ -290,12 +290,12 @@ public class Table implements SemanticNode {
|
||||
* Ignores Entity with ignored == true or removed == true.
|
||||
*
|
||||
* @param type the type of entities to search for
|
||||
* @param redactionEntity the entity, which appears in the row to search
|
||||
* @param textEntity the entity, which appears in the row to search
|
||||
* @return List of all entities of the provided type, which appear in the same row that the provided entity appears in.
|
||||
*/
|
||||
public List<RedactionEntity> getEntitiesOfTypeInSameRow(String type, RedactionEntity redactionEntity) {
|
||||
public List<TextEntity> getEntitiesOfTypeInSameRow(String type, TextEntity textEntity) {
|
||||
|
||||
return redactionEntity.getIntersectingNodes()
|
||||
return textEntity.getIntersectingNodes()
|
||||
.stream()
|
||||
.filter(node -> node instanceof TableCell)
|
||||
.map(node -> (TableCell) node)
|
||||
|
||||
@ -7,7 +7,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
|
||||
@ -41,7 +41,7 @@ public class TableCell implements GenericSemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
@EqualsAndHashCode.Exclude
|
||||
Set<RedactionEntity> entities = new HashSet<>();
|
||||
Set<TextEntity> entities = new HashSet<>();
|
||||
|
||||
|
||||
@Override
|
||||
|
||||
@ -12,7 +12,7 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
|
||||
@ -38,7 +38,7 @@ public class AtomicTextBlock implements TextBlock {
|
||||
Page page;
|
||||
|
||||
//string coordinates
|
||||
Boundary boundary;
|
||||
TextRange textRange;
|
||||
String searchText;
|
||||
List<Integer> lineBreaks;
|
||||
|
||||
@ -61,7 +61,7 @@ public class AtomicTextBlock implements TextBlock {
|
||||
|
||||
return AtomicTextBlock.builder()
|
||||
.id(textBlockIdx)
|
||||
.boundary(new Boundary(stringOffset, stringOffset))
|
||||
.textRange(new TextRange(stringOffset, stringOffset))
|
||||
.searchText("")
|
||||
.lineBreaks(Collections.emptyList())
|
||||
.page(page)
|
||||
@ -82,7 +82,7 @@ public class AtomicTextBlock implements TextBlock {
|
||||
.id(atomicTextBlockData.getId())
|
||||
.numberOnPage(atomicTextBlockData.getNumberOnPage())
|
||||
.page(page)
|
||||
.boundary(new Boundary(atomicTextBlockData.getStart(), atomicTextBlockData.getEnd()))
|
||||
.textRange(new TextRange(atomicTextBlockData.getStart(), atomicTextBlockData.getEnd()))
|
||||
.searchText(atomicTextBlockData.getSearchText())
|
||||
.lineBreaks(Arrays.stream(atomicTextBlockData.getLineBreaks()).boxed().toList())
|
||||
.stringIdxToPositionIdx(Arrays.stream(atomicPositionBlockData.getStringIdxToPositionIdx()).boxed().toList())
|
||||
@ -98,20 +98,20 @@ public class AtomicTextBlock implements TextBlock {
|
||||
}
|
||||
|
||||
|
||||
public Boundary getLineBoundary(int lineNumber) {
|
||||
public TextRange getLineTextRange(int lineNumber) {
|
||||
|
||||
if (lineNumber >= numberOfLines() || lineNumber < 0) {
|
||||
return new Boundary(boundary.start(), boundary.start());
|
||||
return new TextRange(textRange.start(), textRange.start());
|
||||
}
|
||||
if (numberOfLines() == 1) {
|
||||
return boundary;
|
||||
return textRange;
|
||||
}
|
||||
if (lineNumber == 0) {
|
||||
return new Boundary(boundary.start(), lineBreaks.get(0) + boundary.start());
|
||||
return new TextRange(textRange.start(), lineBreaks.get(0) + textRange.start());
|
||||
} else if (lineNumber == numberOfLines() - 1) {
|
||||
return new Boundary(lineBreaks.get(lineBreaks.size() - 1) + boundary.start(), boundary.end());
|
||||
return new TextRange(lineBreaks.get(lineBreaks.size() - 1) + textRange.start(), textRange.end());
|
||||
}
|
||||
return new Boundary(lineBreaks.get(lineNumber - 1) + boundary.start(), lineBreaks.get(lineNumber) + boundary.start());
|
||||
return new TextRange(lineBreaks.get(lineNumber - 1) + textRange.start(), lineBreaks.get(lineNumber) + textRange.start());
|
||||
}
|
||||
|
||||
|
||||
@ -126,9 +126,9 @@ public class AtomicTextBlock implements TextBlock {
|
||||
public int getNextLinebreak(int fromIndex) {
|
||||
|
||||
return lineBreaks.stream()//
|
||||
.filter(linebreak -> linebreak > fromIndex - boundary.start()) //
|
||||
.filter(linebreak -> linebreak > fromIndex - textRange.start()) //
|
||||
.findFirst() //
|
||||
.orElse(searchText.length()) + boundary.start();
|
||||
.orElse(searchText.length()) + textRange.start();
|
||||
}
|
||||
|
||||
|
||||
@ -136,43 +136,43 @@ public class AtomicTextBlock implements TextBlock {
|
||||
public int getPreviousLinebreak(int fromIndex) {
|
||||
|
||||
return lineBreaks.stream()//
|
||||
.filter(linebreak -> linebreak <= fromIndex - boundary.start())//
|
||||
.filter(linebreak -> linebreak <= fromIndex - textRange.start())//
|
||||
.reduce((a, b) -> b)//
|
||||
.orElse(0) + boundary.start();
|
||||
.orElse(0) + textRange.start();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Rectangle2D getPosition(int stringIdx) {
|
||||
|
||||
return positions.get(stringIdxToPositionIdx.get(stringIdx - boundary.start()));
|
||||
return positions.get(stringIdxToPositionIdx.get(stringIdx - textRange.start()));
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<Rectangle2D> getPositions(Boundary stringBoundary) {
|
||||
public List<Rectangle2D> getPositions(TextRange stringTextRange) {
|
||||
|
||||
if (!containsBoundary(stringBoundary)) {
|
||||
throw new IndexOutOfBoundsException(format("%s is out of bounds for %s", stringBoundary, this.boundary));
|
||||
if (!containsTextRange(stringTextRange)) {
|
||||
throw new IndexOutOfBoundsException(format("%s is out of bounds for %s", stringTextRange, this.textRange));
|
||||
}
|
||||
if (stringBoundary.length() == 0) {
|
||||
if (stringTextRange.length() == 0) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
int startPositionIdx = stringIdxToPositionIdx.get(stringBoundary.start() - this.boundary.start());
|
||||
int startPositionIdx = stringIdxToPositionIdx.get(stringTextRange.start() - this.textRange.start());
|
||||
|
||||
if (stringBoundary.end() == this.boundary.end()) {
|
||||
if (stringTextRange.end() == this.textRange.end()) {
|
||||
return positions.subList(startPositionIdx, positions.size());
|
||||
}
|
||||
|
||||
return positions.subList(startPositionIdx, stringIdxToPositionIdx.get(stringBoundary.end() - this.boundary.start()));
|
||||
return positions.subList(startPositionIdx, stringIdxToPositionIdx.get(stringTextRange.end() - this.textRange.start()));
|
||||
|
||||
}
|
||||
|
||||
|
||||
public Map<Page, List<Rectangle2D>> getPositionsPerPage(Boundary stringBoundary) {
|
||||
public Map<Page, List<Rectangle2D>> getPositionsPerPage(TextRange stringTextRange) {
|
||||
|
||||
List<Rectangle2D> rectanglesPerLine = stringBoundary.split(getAllLineBreaksInBoundary(stringBoundary))
|
||||
List<Rectangle2D> rectanglesPerLine = stringTextRange.split(getAllLineBreaksInBoundary(stringTextRange))
|
||||
.stream()
|
||||
.map(this::getPositions)
|
||||
.map(RectangleTransformations::rectangleBBoxWithGaps)
|
||||
@ -185,18 +185,18 @@ public class AtomicTextBlock implements TextBlock {
|
||||
|
||||
|
||||
@Override
|
||||
public String subSequenceWithLineBreaks(Boundary boundary) {
|
||||
public String subSequenceWithLineBreaks(TextRange textRange) {
|
||||
|
||||
if (boundary.length() == 0 || !getBoundary().contains(boundary)) {
|
||||
if (textRange.length() == 0 || !getTextRange().contains(textRange)) {
|
||||
return "";
|
||||
}
|
||||
|
||||
Set<Integer> lbInBoundary = lineBreaks.stream().map(i -> i + boundary.start()).filter(boundary::contains).collect(Collectors.toSet());
|
||||
if (boundary.end() == getBoundary().end()) {
|
||||
lbInBoundary.add(getBoundary().end());
|
||||
Set<Integer> lbInBoundary = lineBreaks.stream().map(i -> i + textRange.start()).filter(textRange::contains).collect(Collectors.toSet());
|
||||
if (textRange.end() == getTextRange().end()) {
|
||||
lbInBoundary.add(getTextRange().end());
|
||||
}
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = boundary.start(); i < boundary.end(); i++) {
|
||||
for (int i = textRange.start(); i < textRange.end(); i++) {
|
||||
char character = this.charAt(i);
|
||||
if (lbInBoundary.contains(i + 1)) {
|
||||
// always plus one, due to the linebreaks being an exclusive end index
|
||||
@ -215,9 +215,9 @@ public class AtomicTextBlock implements TextBlock {
|
||||
}
|
||||
|
||||
|
||||
private List<Integer> getAllLineBreaksInBoundary(Boundary boundary) {
|
||||
private List<Integer> getAllLineBreaksInBoundary(TextRange textRange) {
|
||||
|
||||
return getLineBreaks().stream().map(linebreak -> linebreak + this.boundary.start()).filter(boundary::contains).toList();
|
||||
return getLineBreaks().stream().map(linebreak -> linebreak + this.textRange.start()).filter(textRange::contains).toList();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -10,7 +10,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
@ -23,7 +23,7 @@ public class ConcatenatedTextBlock implements TextBlock {
|
||||
|
||||
List<AtomicTextBlock> atomicTextBlocks;
|
||||
String searchText;
|
||||
Boundary boundary;
|
||||
TextRange textRange;
|
||||
|
||||
|
||||
public static ConcatenatedTextBlock empty() {
|
||||
@ -36,12 +36,12 @@ public class ConcatenatedTextBlock implements TextBlock {
|
||||
|
||||
this.atomicTextBlocks = new LinkedList<>();
|
||||
if (atomicTextBlocks.isEmpty()) {
|
||||
boundary = new Boundary(-1, -1);
|
||||
textRange = new TextRange(-1, -1);
|
||||
return;
|
||||
}
|
||||
var firstTextBlock = atomicTextBlocks.get(0);
|
||||
this.atomicTextBlocks.add(firstTextBlock);
|
||||
boundary = new Boundary(firstTextBlock.getBoundary().start(), firstTextBlock.getBoundary().end());
|
||||
textRange = new TextRange(firstTextBlock.getTextRange().start(), firstTextBlock.getTextRange().end());
|
||||
|
||||
atomicTextBlocks.subList(1, atomicTextBlocks.size()).forEach(this::concat);
|
||||
}
|
||||
@ -50,13 +50,13 @@ public class ConcatenatedTextBlock implements TextBlock {
|
||||
public ConcatenatedTextBlock concat(TextBlock textBlock) {
|
||||
|
||||
if (this.atomicTextBlocks.isEmpty()) {
|
||||
boundary.setStart(textBlock.getBoundary().start());
|
||||
boundary.setEnd(textBlock.getBoundary().end());
|
||||
} else if (boundary.end() != textBlock.getBoundary().start()) {
|
||||
throw new UnsupportedOperationException(format("Can only concat consecutive TextBlocks, trying to concat %s and %s", boundary, textBlock.getBoundary()));
|
||||
textRange.setStart(textBlock.getTextRange().start());
|
||||
textRange.setEnd(textBlock.getTextRange().end());
|
||||
} else if (textRange.end() != textBlock.getTextRange().start()) {
|
||||
throw new UnsupportedOperationException(format("Can only concat consecutive TextBlocks, trying to concat %s and %s", textRange, textBlock.getTextRange()));
|
||||
}
|
||||
this.atomicTextBlocks.addAll(textBlock.getAtomicTextBlocks());
|
||||
boundary.setEnd(textBlock.getBoundary().end());
|
||||
textRange.setEnd(textBlock.getTextRange().end());
|
||||
this.searchText = null;
|
||||
return this;
|
||||
}
|
||||
@ -64,13 +64,13 @@ public class ConcatenatedTextBlock implements TextBlock {
|
||||
|
||||
private AtomicTextBlock getAtomicTextBlockByStringIndex(int stringIdx) {
|
||||
|
||||
return atomicTextBlocks.stream().filter(textBlock -> textBlock.getBoundary().contains(stringIdx)).findAny().orElseThrow(IndexOutOfBoundsException::new);
|
||||
return atomicTextBlocks.stream().filter(textBlock -> textBlock.getTextRange().contains(stringIdx)).findAny().orElseThrow(IndexOutOfBoundsException::new);
|
||||
}
|
||||
|
||||
|
||||
private List<AtomicTextBlock> getAllAtomicTextBlocksPartiallyInStringBoundary(Boundary boundary) {
|
||||
private List<AtomicTextBlock> getAllAtomicTextBlocksPartiallyInStringBoundary(TextRange textRange) {
|
||||
|
||||
return atomicTextBlocks.stream().filter(tb -> tb.getBoundary().intersects(boundary)).toList();
|
||||
return atomicTextBlocks.stream().filter(tb -> tb.getTextRange().intersects(textRange)).toList();
|
||||
}
|
||||
|
||||
|
||||
@ -121,99 +121,99 @@ public class ConcatenatedTextBlock implements TextBlock {
|
||||
return getAtomicTextBlockByStringIndex(stringIdx).getPosition(stringIdx);
|
||||
}
|
||||
|
||||
public Boundary getLineBoundary(int lineNumber) {
|
||||
public TextRange getLineTextRange(int lineNumber) {
|
||||
|
||||
if (atomicTextBlocks.size() == 1) {
|
||||
return atomicTextBlocks.get(0).getLineBoundary(lineNumber);
|
||||
return atomicTextBlocks.get(0).getLineTextRange(lineNumber);
|
||||
}
|
||||
int lineNumberInCurrentBlock = lineNumber;
|
||||
for (AtomicTextBlock atomicTextBlock : atomicTextBlocks) {
|
||||
if (lineNumberInCurrentBlock < atomicTextBlock.numberOfLines()) {
|
||||
return atomicTextBlock.getLineBoundary(lineNumberInCurrentBlock);
|
||||
return atomicTextBlock.getLineTextRange(lineNumberInCurrentBlock);
|
||||
}
|
||||
lineNumberInCurrentBlock -= atomicTextBlock.numberOfLines();
|
||||
}
|
||||
return new Boundary(boundary.start(), boundary.start());
|
||||
return new TextRange(textRange.start(), textRange.start());
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Rectangle2D> getPositions(Boundary stringBoundary) {
|
||||
public List<Rectangle2D> getPositions(TextRange stringTextRange) {
|
||||
|
||||
|
||||
List<AtomicTextBlock> textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(stringBoundary);
|
||||
List<AtomicTextBlock> textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(stringTextRange);
|
||||
|
||||
if (textBlocks.isEmpty()) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
if (textBlocks.size() == 1) {
|
||||
return textBlocks.get(0).getPositions(stringBoundary);
|
||||
return textBlocks.get(0).getPositions(stringTextRange);
|
||||
}
|
||||
|
||||
AtomicTextBlock firstTextBlock = textBlocks.get(0);
|
||||
List<Rectangle2D> positions = new LinkedList<>(firstTextBlock.getPositions(new Boundary(stringBoundary.start(), firstTextBlock.getBoundary().end())));
|
||||
List<Rectangle2D> positions = new LinkedList<>(firstTextBlock.getPositions(new TextRange(stringTextRange.start(), firstTextBlock.getTextRange().end())));
|
||||
|
||||
for (AtomicTextBlock textBlock : textBlocks.subList(1, textBlocks.size() - 1)) {
|
||||
positions.addAll(textBlock.getPositions());
|
||||
}
|
||||
|
||||
var lastTextBlock = textBlocks.get(textBlocks.size() - 1);
|
||||
positions.addAll(lastTextBlock.getPositions(new Boundary(lastTextBlock.getBoundary().start(), stringBoundary.end())));
|
||||
positions.addAll(lastTextBlock.getPositions(new TextRange(lastTextBlock.getTextRange().start(), stringTextRange.end())));
|
||||
|
||||
return positions;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Map<Page, List<Rectangle2D>> getPositionsPerPage(Boundary stringBoundary) {
|
||||
public Map<Page, List<Rectangle2D>> getPositionsPerPage(TextRange stringTextRange) {
|
||||
|
||||
List<AtomicTextBlock> textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(stringBoundary);
|
||||
List<AtomicTextBlock> textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(stringTextRange);
|
||||
|
||||
if (textBlocks.isEmpty()) {
|
||||
return new HashMap<>();
|
||||
}
|
||||
|
||||
if (textBlocks.size() == 1) {
|
||||
return textBlocks.get(0).getPositionsPerPage(stringBoundary);
|
||||
return textBlocks.get(0).getPositionsPerPage(stringTextRange);
|
||||
}
|
||||
|
||||
AtomicTextBlock firstTextBlock = textBlocks.get(0);
|
||||
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = firstTextBlock.getPositionsPerPage(new Boundary(stringBoundary.start(), firstTextBlock.getBoundary().end()));
|
||||
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = firstTextBlock.getPositionsPerPage(new TextRange(stringTextRange.start(), firstTextBlock.getTextRange().end()));
|
||||
|
||||
for (AtomicTextBlock textBlock : textBlocks.subList(1, textBlocks.size() - 1)) {
|
||||
rectanglesPerLinePerPage = mergeEntityPositionsWithSamePageNode(rectanglesPerLinePerPage, textBlock.getPositionsPerPage(textBlock.getBoundary()));
|
||||
rectanglesPerLinePerPage = mergeEntityPositionsWithSamePageNode(rectanglesPerLinePerPage, textBlock.getPositionsPerPage(textBlock.getTextRange()));
|
||||
}
|
||||
|
||||
AtomicTextBlock lastTextBlock = textBlocks.get(textBlocks.size() - 1);
|
||||
rectanglesPerLinePerPage = mergeEntityPositionsWithSamePageNode(rectanglesPerLinePerPage,
|
||||
lastTextBlock.getPositionsPerPage(new Boundary(lastTextBlock.getBoundary().start(), stringBoundary.end())));
|
||||
lastTextBlock.getPositionsPerPage(new TextRange(lastTextBlock.getTextRange().start(), stringTextRange.end())));
|
||||
|
||||
return rectanglesPerLinePerPage;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String subSequenceWithLineBreaks(Boundary boundary) {
|
||||
public String subSequenceWithLineBreaks(TextRange textRange) {
|
||||
|
||||
if (boundary.length() == 0 || !getBoundary().contains(boundary)) {
|
||||
if (textRange.length() == 0 || !getTextRange().contains(textRange)) {
|
||||
return "";
|
||||
}
|
||||
|
||||
List<AtomicTextBlock> textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(boundary);
|
||||
List<AtomicTextBlock> textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(textRange);
|
||||
|
||||
if (textBlocks.size() == 1) {
|
||||
return textBlocks.get(0).subSequenceWithLineBreaks(boundary);
|
||||
return textBlocks.get(0).subSequenceWithLineBreaks(textRange);
|
||||
}
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
AtomicTextBlock firstTextBlock = textBlocks.get(0);
|
||||
sb.append(firstTextBlock.subSequenceWithLineBreaks(new Boundary(boundary.start(), firstTextBlock.getBoundary().end())));
|
||||
sb.append(firstTextBlock.subSequenceWithLineBreaks(new TextRange(textRange.start(), firstTextBlock.getTextRange().end())));
|
||||
|
||||
for (AtomicTextBlock textBlock : textBlocks.subList(1, textBlocks.size() - 1)) {
|
||||
sb.append(textBlock.searchTextWithLineBreaks());
|
||||
}
|
||||
|
||||
var lastTextBlock = textBlocks.get(textBlocks.size() - 1);
|
||||
sb.append(lastTextBlock.subSequenceWithLineBreaks(new Boundary(lastTextBlock.getBoundary().start(), boundary.end())));
|
||||
sb.append(lastTextBlock.subSequenceWithLineBreaks(new TextRange(lastTextBlock.getTextRange().start(), textRange.end())));
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@ -10,7 +10,7 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
|
||||
|
||||
@ -22,7 +22,7 @@ public interface TextBlock extends CharSequence {
|
||||
List<AtomicTextBlock> getAtomicTextBlocks();
|
||||
|
||||
|
||||
Boundary getBoundary();
|
||||
TextRange getTextRange();
|
||||
|
||||
|
||||
int getNextLinebreak(int fromIndex);
|
||||
@ -31,7 +31,7 @@ public interface TextBlock extends CharSequence {
|
||||
int getPreviousLinebreak(int fromIndex);
|
||||
|
||||
|
||||
Boundary getLineBoundary(int lineNumber);
|
||||
TextRange getLineTextRange(int lineNumber);
|
||||
|
||||
|
||||
|
||||
@ -41,13 +41,13 @@ public interface TextBlock extends CharSequence {
|
||||
Rectangle2D getPosition(int stringIdx);
|
||||
|
||||
|
||||
List<Rectangle2D> getPositions(Boundary stringBoundary);
|
||||
List<Rectangle2D> getPositions(TextRange stringTextRange);
|
||||
|
||||
|
||||
Map<Page, List<Rectangle2D>> getPositionsPerPage(Boundary stringBoundary);
|
||||
Map<Page, List<Rectangle2D>> getPositionsPerPage(TextRange stringTextRange);
|
||||
|
||||
|
||||
String subSequenceWithLineBreaks(Boundary boundary);
|
||||
String subSequenceWithLineBreaks(TextRange textRange);
|
||||
|
||||
|
||||
int numberOfLines();
|
||||
@ -55,13 +55,13 @@ public interface TextBlock extends CharSequence {
|
||||
|
||||
default CharSequence getLine(int lineNumber) {
|
||||
|
||||
return subSequence(getLineBoundary(lineNumber));
|
||||
return subSequence(getLineTextRange(lineNumber));
|
||||
}
|
||||
|
||||
|
||||
default List<Rectangle2D> getLinePositions(int lineNumber) {
|
||||
|
||||
return getPositions(getLineBoundary(lineNumber));
|
||||
return getPositions(getLineTextRange(lineNumber));
|
||||
}
|
||||
|
||||
|
||||
@ -72,13 +72,13 @@ public interface TextBlock extends CharSequence {
|
||||
|
||||
default String searchTextWithLineBreaks() {
|
||||
|
||||
return subSequenceWithLineBreaks(getBoundary());
|
||||
return subSequenceWithLineBreaks(getTextRange());
|
||||
}
|
||||
|
||||
|
||||
default int indexOf(String searchTerm) {
|
||||
|
||||
return indexOf(searchTerm, getBoundary().start());
|
||||
return indexOf(searchTerm, getTextRange().start());
|
||||
}
|
||||
|
||||
|
||||
@ -88,10 +88,10 @@ public interface TextBlock extends CharSequence {
|
||||
}
|
||||
|
||||
|
||||
default Set<Page> getPages(Boundary boundary) {
|
||||
default Set<Page> getPages(TextRange textRange) {
|
||||
|
||||
return getAtomicTextBlocks().stream()
|
||||
.filter(atomicTextBlock -> atomicTextBlock.getBoundary().intersects(boundary))
|
||||
.filter(atomicTextBlock -> atomicTextBlock.getTextRange().intersects(textRange))
|
||||
.map(AtomicTextBlock::getPage)
|
||||
.collect(Collectors.toUnmodifiableSet());
|
||||
}
|
||||
@ -99,38 +99,38 @@ public interface TextBlock extends CharSequence {
|
||||
|
||||
default int indexOf(String searchTerm, int startOffset) {
|
||||
|
||||
int start = getSearchText().indexOf(searchTerm, startOffset - getBoundary().start());
|
||||
int start = getSearchText().indexOf(searchTerm, startOffset - getTextRange().start());
|
||||
if (start == -1) {
|
||||
return -1;
|
||||
}
|
||||
return start + getBoundary().start();
|
||||
return start + getTextRange().start();
|
||||
}
|
||||
|
||||
|
||||
default CharSequence getFirstLine() {
|
||||
|
||||
return subSequence(getBoundary().start(), getNextLinebreak(getBoundary().start()));
|
||||
return subSequence(getTextRange().start(), getNextLinebreak(getTextRange().start()));
|
||||
}
|
||||
|
||||
|
||||
default boolean containsBoundary(Boundary boundary) {
|
||||
default boolean containsTextRange(TextRange textRange) {
|
||||
|
||||
if (boundary.end() < boundary.start()) {
|
||||
throw new IllegalArgumentException(format("Invalid %s, StartIndex must be smaller than EndIndex", boundary));
|
||||
if (textRange.end() < textRange.start()) {
|
||||
throw new IllegalArgumentException(format("Invalid %s, StartIndex must be smaller than EndIndex", textRange));
|
||||
}
|
||||
return getBoundary().contains(boundary);
|
||||
return getTextRange().contains(textRange);
|
||||
}
|
||||
|
||||
|
||||
default boolean containsIndex(int stringIndex) {
|
||||
|
||||
return getBoundary().contains(stringIndex);
|
||||
return getTextRange().contains(stringIndex);
|
||||
}
|
||||
|
||||
|
||||
default CharSequence subSequence(Boundary boundary) {
|
||||
default CharSequence subSequence(TextRange textRange) {
|
||||
|
||||
return subSequence(boundary.start(), boundary.end());
|
||||
return subSequence(textRange.start(), textRange.end());
|
||||
}
|
||||
|
||||
|
||||
@ -147,21 +147,21 @@ public interface TextBlock extends CharSequence {
|
||||
@Override
|
||||
default CharSequence subSequence(int start, int end) {
|
||||
|
||||
return getSearchText().substring(start - getBoundary().start(), end - getBoundary().start());
|
||||
return getSearchText().substring(start - getTextRange().start(), end - getTextRange().start());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
default int length() {
|
||||
|
||||
return getBoundary().length();
|
||||
return getTextRange().length();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
default char charAt(int index) {
|
||||
|
||||
return getSearchText().charAt(index - getBoundary().start());
|
||||
return getSearchText().charAt(index - getTextRange().start());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -18,12 +18,13 @@ import org.kie.api.runtime.KieSession;
|
||||
|
||||
import com.google.common.base.Functions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.ConsecutiveBoundaryCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.ManualChangeOverwrite;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.PositionOnPage;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.NodeType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
|
||||
@ -55,28 +56,28 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> betweenStrings(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> betweenStrings(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock());
|
||||
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock());
|
||||
List<TextRange> startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock());
|
||||
List<TextRange> stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock());
|
||||
|
||||
return betweenBoundaries(startBoundaries, stopBoundaries, type, entityType, node);
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> betweenStringsIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> betweenStringsIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock());
|
||||
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock());
|
||||
List<TextRange> startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock());
|
||||
List<TextRange> stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock());
|
||||
|
||||
return betweenBoundaries(startBoundaries, stopBoundaries, type, entityType, node);
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> betweenStringsIncludeStart(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> betweenStringsIncludeStart(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock());
|
||||
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock());
|
||||
List<TextRange> startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock());
|
||||
List<TextRange> stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock());
|
||||
|
||||
startBoundaries.forEach(boundary -> {
|
||||
boundary.setStart(boundary.start() - start.length());
|
||||
@ -87,10 +88,10 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> betweenStringsIncludeStartIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> betweenStringsIncludeStartIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock());
|
||||
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock());
|
||||
List<TextRange> startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock());
|
||||
List<TextRange> stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock());
|
||||
|
||||
startBoundaries.forEach(boundary -> {
|
||||
boundary.setStart(boundary.start() - start.length());
|
||||
@ -101,10 +102,10 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> betweenStringsIncludeEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> betweenStringsIncludeEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock());
|
||||
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock());
|
||||
List<TextRange> startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock());
|
||||
List<TextRange> stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock());
|
||||
|
||||
stopBoundaries.forEach(boundary -> {
|
||||
boundary.setStart(boundary.start() + stop.length());
|
||||
@ -115,10 +116,10 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> betweenStringsIncludeEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> betweenStringsIncludeEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock());
|
||||
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock());
|
||||
List<TextRange> startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock());
|
||||
List<TextRange> stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock());
|
||||
|
||||
stopBoundaries.forEach(boundary -> {
|
||||
boundary.setStart(boundary.start() + stop.length());
|
||||
@ -129,10 +130,10 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> betweenStringsIncludeStartAndEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> betweenStringsIncludeStartAndEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock());
|
||||
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock());
|
||||
List<TextRange> startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock());
|
||||
List<TextRange> stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock());
|
||||
|
||||
startBoundaries.forEach(boundary -> {
|
||||
boundary.setStart(boundary.start() - start.length());
|
||||
@ -147,10 +148,10 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> betweenStringsIncludeStartAndEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> betweenStringsIncludeStartAndEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock());
|
||||
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock());
|
||||
List<TextRange> startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock());
|
||||
List<TextRange> stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock());
|
||||
|
||||
startBoundaries.forEach(boundary -> {
|
||||
boundary.setStart(boundary.start() - start.length());
|
||||
@ -165,32 +166,32 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> betweenRegexes(String regexStart, String regexStop, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> betweenRegexes(String regexStart, String regexStop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
TextBlock textBlock = node.getTextBlock();
|
||||
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByRegex(regexStart, textBlock);
|
||||
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByRegex(regexStop, textBlock);
|
||||
List<TextRange> startBoundaries = RedactionSearchUtility.findBoundariesByRegex(regexStart, textBlock);
|
||||
List<TextRange> stopBoundaries = RedactionSearchUtility.findBoundariesByRegex(regexStop, textBlock);
|
||||
|
||||
return betweenBoundaries(startBoundaries, stopBoundaries, type, entityType, node);
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> betweenRegexesIgnoreCase(String regexStart, String regexStop, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> betweenRegexesIgnoreCase(String regexStart, String regexStop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
TextBlock textBlock = node.getTextBlock();
|
||||
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByRegexIgnoreCase(regexStart, 0, textBlock);
|
||||
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByRegexIgnoreCase(regexStop, 0, textBlock);
|
||||
List<TextRange> startBoundaries = RedactionSearchUtility.findBoundariesByRegexIgnoreCase(regexStart, 0, textBlock);
|
||||
List<TextRange> stopBoundaries = RedactionSearchUtility.findBoundariesByRegexIgnoreCase(regexStop, 0, textBlock);
|
||||
|
||||
return betweenBoundaries(startBoundaries, stopBoundaries, type, entityType, node);
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> betweenBoundaries(List<Boundary> startBoundaries, List<Boundary> stopBoundaries, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> betweenBoundaries(List<TextRange> startBoundaries, List<TextRange> stopBoundaries, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
if (startBoundaries.isEmpty() || stopBoundaries.isEmpty()) {
|
||||
return Stream.empty();
|
||||
}
|
||||
List<Boundary> entityBoundaries = findNonOverlappingBoundariesBetweenBoundariesWithMinimalDistances(startBoundaries, stopBoundaries);
|
||||
List<TextRange> entityBoundaries = findNonOverlappingBoundariesBetweenBoundariesWithMinimalDistances(startBoundaries, stopBoundaries);
|
||||
return entityBoundaries.stream()
|
||||
.map(boundary -> boundary.trim(node.getTextBlock()))
|
||||
.filter(boundary -> isValidEntityBoundary(node.getTextBlock(), boundary))
|
||||
@ -200,23 +201,23 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
private static List<Boundary> findNonOverlappingBoundariesBetweenBoundariesWithMinimalDistances(List<Boundary> startBoundaries, List<Boundary> stopBoundaries) {
|
||||
private static List<TextRange> findNonOverlappingBoundariesBetweenBoundariesWithMinimalDistances(List<TextRange> startBoundaries, List<TextRange> stopBoundaries) {
|
||||
|
||||
List<Boundary> entityBoundaries = new LinkedList<>();
|
||||
for (Boundary startBoundary : startBoundaries) {
|
||||
Optional<Boundary> optionalStopBoundaryWithMinimalDistance = stopBoundaries.stream()
|
||||
.filter(stopBoundary -> stopBoundary.start() > startBoundary.end())
|
||||
.min(Comparator.comparingInt(Boundary::start));
|
||||
List<TextRange> entityBoundaries = new LinkedList<>();
|
||||
for (TextRange startTextRange : startBoundaries) {
|
||||
Optional<TextRange> optionalStopBoundaryWithMinimalDistance = stopBoundaries.stream()
|
||||
.filter(stopBoundary -> stopBoundary.start() > startTextRange.end())
|
||||
.min(Comparator.comparingInt(TextRange::start));
|
||||
if (optionalStopBoundaryWithMinimalDistance.isEmpty()) {
|
||||
break;
|
||||
}
|
||||
entityBoundaries.add(new Boundary(startBoundary.end(), optionalStopBoundaryWithMinimalDistance.get().start()));
|
||||
entityBoundaries.add(new TextRange(startTextRange.end(), optionalStopBoundaryWithMinimalDistance.get().start()));
|
||||
}
|
||||
return removeOuterOverlappingBoundaries(entityBoundaries);
|
||||
}
|
||||
|
||||
|
||||
private static List<Boundary> removeOuterOverlappingBoundaries(List<Boundary> entityBoundaries) {
|
||||
private static List<TextRange> removeOuterOverlappingBoundaries(List<TextRange> entityBoundaries) {
|
||||
/*
|
||||
In some cases we get boundaries, where one contains the other. This happens for Example when we have two start boundaries and one stop boundary after the two start boundaries.
|
||||
Then we get two boundaries where one is entirely contained in the other. So we want to remove the outer boundary.
|
||||
@ -230,9 +231,9 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> bySearchImplementation(SearchImplementation searchImplementation, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> bySearchImplementation(SearchImplementation searchImplementation, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return searchImplementation.getBoundaries(node.getTextBlock(), node.getBoundary())
|
||||
return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange())
|
||||
.stream()
|
||||
.filter(boundary -> isValidEntityBoundary(node.getTextBlock(), boundary))
|
||||
.map(bounds -> byBoundary(bounds, type, entityType, node))
|
||||
@ -241,11 +242,11 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> lineAfterStrings(List<String> strings, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> lineAfterStrings(List<String> strings, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
TextBlock textBlock = node.getTextBlock();
|
||||
SearchImplementation searchImplementation = new SearchImplementation(strings, false);
|
||||
return searchImplementation.getBoundaries(textBlock, node.getBoundary())
|
||||
return searchImplementation.getBoundaries(textBlock, node.getTextRange())
|
||||
.stream()
|
||||
.map(boundary -> toLineAfterBoundary(textBlock, boundary))
|
||||
.filter(boundary -> isValidEntityBoundary(textBlock, boundary))
|
||||
@ -255,11 +256,11 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> lineAfterStringsIgnoreCase(List<String> strings, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> lineAfterStringsIgnoreCase(List<String> strings, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
TextBlock textBlock = node.getTextBlock();
|
||||
SearchImplementation searchImplementation = new SearchImplementation(strings, true);
|
||||
return searchImplementation.getBoundaries(textBlock, node.getBoundary())
|
||||
return searchImplementation.getBoundaries(textBlock, node.getTextRange())
|
||||
.stream()
|
||||
.map(boundary -> toLineAfterBoundary(textBlock, boundary))
|
||||
.filter(boundary -> isValidEntityBoundary(textBlock, boundary))
|
||||
@ -269,7 +270,7 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> lineAfterString(String string, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> lineAfterString(String string, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
TextBlock textBlock = node.getTextBlock();
|
||||
return RedactionSearchUtility.findBoundariesByString(string, textBlock)
|
||||
@ -282,7 +283,7 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> lineAfterStringIgnoreCase(String string, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> lineAfterStringIgnoreCase(String string, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
TextBlock textBlock = node.getTextBlock();
|
||||
return RedactionSearchUtility.findBoundariesByStringIgnoreCase(string, textBlock)
|
||||
@ -295,7 +296,7 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> lineAfterStringAcrossColumns(String string, String type, EntityType entityType, Table tableNode) {
|
||||
public Stream<TextEntity> lineAfterStringAcrossColumns(String string, String type, EntityType entityType, Table tableNode) {
|
||||
|
||||
return tableNode.streamTableCells()
|
||||
.flatMap(tableCell -> lineAfterBoundariesAcrossColumns(RedactionSearchUtility.findBoundariesByString(string, tableCell.getTextBlock()),
|
||||
@ -306,7 +307,7 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> lineAfterStringAcrossColumnsIgnoreCase(String string, String type, EntityType entityType, Table tableNode) {
|
||||
public Stream<TextEntity> lineAfterStringAcrossColumnsIgnoreCase(String string, String type, EntityType entityType, Table tableNode) {
|
||||
|
||||
return tableNode.streamTableCells()
|
||||
.flatMap(tableCell -> lineAfterBoundariesAcrossColumns(RedactionSearchUtility.findBoundariesByStringIgnoreCase(string, tableCell.getTextBlock()),
|
||||
@ -327,7 +328,7 @@ public class EntityCreationService {
|
||||
* @param tableNode the table node
|
||||
* @return a stream of RedactionEntities
|
||||
*/
|
||||
private Stream<RedactionEntity> lineAfterBoundariesAcrossColumns(List<Boundary> boundaries, TableCell tableCell, String type, EntityType entityType, Table tableNode) {
|
||||
private Stream<TextEntity> lineAfterBoundariesAcrossColumns(List<TextRange> boundaries, TableCell tableCell, String type, EntityType entityType, Table tableNode) {
|
||||
|
||||
return boundaries.stream()
|
||||
.map(boundary -> RectangleTransformations.rectangle2DBBox(tableCell.getTextBlock().getPositions(boundary)))
|
||||
@ -344,13 +345,13 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Optional<RedactionEntity> semanticNodeAfterString(SemanticNode semanticNode, String string, String type, EntityType entityType) {
|
||||
public Optional<TextEntity> semanticNodeAfterString(SemanticNode semanticNode, String string, String type, EntityType entityType) {
|
||||
|
||||
var textBlock = semanticNode.getTextBlock();
|
||||
int startIndex = Math.min(textBlock.indexOf(string), 0);
|
||||
var boundary = new Boundary(startIndex, semanticNode.getBoundary().end());
|
||||
var boundary = new TextRange(startIndex, semanticNode.getTextRange().end());
|
||||
if (boundary.length() > 0) {
|
||||
boundary = new Boundary(boundary.start(), boundary.end() - 1);
|
||||
boundary = new TextRange(boundary.start(), boundary.end() - 1);
|
||||
}
|
||||
if (!isValidEntityBoundary(textBlock, boundary)) {
|
||||
return Optional.empty();
|
||||
@ -359,31 +360,31 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return byRegexWithLineBreaks(regexPattern, type, entityType, 0, node);
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return byRegexWithLineBreaksIgnoreCase(regexPattern, type, entityType, 0, node);
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> byRegex(String regexPattern, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> byRegex(String regexPattern, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return byRegex(regexPattern, type, entityType, 0, node);
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> byRegexIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> byRegexIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return byRegexIgnoreCase(regexPattern, type, entityType, 0, node);
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
|
||||
public Stream<TextEntity> byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
|
||||
|
||||
return RedactionSearchUtility.findBoundariesByRegexWithLineBreaks(regexPattern, group, node.getTextBlock())
|
||||
.stream()
|
||||
@ -393,7 +394,7 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
|
||||
public Stream<TextEntity> byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
|
||||
|
||||
return RedactionSearchUtility.findBoundariesByRegexWithLineBreaksIgnoreCase(regexPattern, group, node.getTextBlock())
|
||||
.stream()
|
||||
@ -403,7 +404,7 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> byRegex(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
|
||||
public Stream<TextEntity> byRegex(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
|
||||
|
||||
return RedactionSearchUtility.findBoundariesByRegex(regexPattern, group, node.getTextBlock())
|
||||
.stream()
|
||||
@ -413,7 +414,7 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> byRegexIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
|
||||
public Stream<TextEntity> byRegexIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
|
||||
|
||||
return RedactionSearchUtility.findBoundariesByRegexIgnoreCase(regexPattern, group, node.getTextBlock())
|
||||
.stream()
|
||||
@ -423,7 +424,7 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> byString(String keyword, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> byString(String keyword, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return RedactionSearchUtility.findBoundariesByString(keyword, node.getTextBlock())
|
||||
.stream()
|
||||
@ -433,7 +434,7 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> byStringIgnoreCase(String keyword, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> byStringIgnoreCase(String keyword, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return RedactionSearchUtility.findBoundariesByStringIgnoreCase(keyword, node.getTextBlock())
|
||||
.stream()
|
||||
@ -443,16 +444,16 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> bySemanticNodeParagraphsOnly(SemanticNode node, String type, EntityType entityType) {
|
||||
public Stream<TextEntity> bySemanticNodeParagraphsOnly(SemanticNode node, String type, EntityType entityType) {
|
||||
|
||||
return node.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(semanticNode -> bySemanticNode(semanticNode, type, entityType)).filter(Optional::isPresent).map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> bySemanticNodeParagraphsOnlyMergeConsecutive(SemanticNode node, String type, EntityType entityType) {
|
||||
public Stream<TextEntity> bySemanticNodeParagraphsOnlyMergeConsecutive(SemanticNode node, String type, EntityType entityType) {
|
||||
|
||||
return node.streamAllSubNodesOfType(NodeType.PARAGRAPH)
|
||||
.map(SemanticNode::getBoundary)
|
||||
.map(SemanticNode::getTextRange)
|
||||
.collect(new ConsecutiveBoundaryCollector())
|
||||
.stream()
|
||||
.map(boundary -> byBoundary(boundary, type, entityType, node))
|
||||
@ -461,42 +462,42 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Optional<RedactionEntity> semanticNodeAfterString(String string, String type, EntityType entityType, SemanticNode node) {
|
||||
public Optional<TextEntity> semanticNodeAfterString(String string, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
if (!node.containsString(string)) {
|
||||
return Optional.empty();
|
||||
}
|
||||
Boundary boundary = new Boundary(node.getTextBlock().indexOf(string) + string.length(), node.getBoundary().end());
|
||||
return byBoundary(boundary, type, entityType, node);
|
||||
TextRange textRange = new TextRange(node.getTextBlock().indexOf(string) + string.length(), node.getTextRange().end());
|
||||
return byBoundary(textRange, type, entityType, node);
|
||||
}
|
||||
|
||||
|
||||
public Optional<RedactionEntity> bySemanticNode(SemanticNode node, String type, EntityType entityType) {
|
||||
public Optional<TextEntity> bySemanticNode(SemanticNode node, String type, EntityType entityType) {
|
||||
|
||||
Boundary boundary = node.getTextBlock().getBoundary();
|
||||
TextRange textRange = node.getTextBlock().getTextRange();
|
||||
|
||||
if (boundary.length() > 0) {
|
||||
boundary = new Boundary(boundary.start(), boundary.end() - 1);
|
||||
if (textRange.length() > 0) {
|
||||
textRange = new TextRange(textRange.start(), textRange.end() - 1);
|
||||
}
|
||||
if (!isValidEntityBoundary(node.getTextBlock(), boundary)) {
|
||||
if (!isValidEntityBoundary(node.getTextBlock(), textRange)) {
|
||||
return Optional.empty();
|
||||
}
|
||||
return byBoundary(boundary, type, entityType, node);
|
||||
return byBoundary(textRange, type, entityType, node);
|
||||
}
|
||||
|
||||
|
||||
public Optional<RedactionEntity> byPrefixExpansionRegex(RedactionEntity entity, String regexPattern) {
|
||||
public Optional<TextEntity> byPrefixExpansionRegex(TextEntity entity, String regexPattern) {
|
||||
|
||||
int expandedStart = RedactionSearchUtility.getExpandedStartByRegex(entity, regexPattern);
|
||||
return byBoundary(new Boundary(expandedStart, entity.getBoundary().end()), entity.getType(), entity.getEntityType(), entity.getDeepestFullyContainingNode());
|
||||
return byBoundary(new TextRange(expandedStart, entity.getTextRange().end()), entity.getType(), entity.getEntityType(), entity.getDeepestFullyContainingNode());
|
||||
}
|
||||
|
||||
|
||||
public Optional<RedactionEntity> bySuffixExpansionRegex(RedactionEntity entity, String regexPattern) {
|
||||
public Optional<TextEntity> bySuffixExpansionRegex(TextEntity entity, String regexPattern) {
|
||||
|
||||
int expandedEnd = RedactionSearchUtility.getExpandedEndByRegex(entity, regexPattern);
|
||||
expandedEnd = truncateEndIfLineBreakIsBetween(entity.getBoundary().end(), expandedEnd, entity.getDeepestFullyContainingNode().getTextBlock());
|
||||
return byBoundary(new Boundary(entity.getBoundary().start(), expandedEnd), entity.getType(), entity.getEntityType(), entity.getDeepestFullyContainingNode());
|
||||
expandedEnd = truncateEndIfLineBreakIsBetween(entity.getTextRange().end(), expandedEnd, entity.getDeepestFullyContainingNode().getTextBlock());
|
||||
return byBoundary(new TextRange(entity.getTextRange().start(), expandedEnd), entity.getType(), entity.getEntityType(), entity.getDeepestFullyContainingNode());
|
||||
}
|
||||
|
||||
|
||||
@ -514,19 +515,19 @@ public class EntityCreationService {
|
||||
* If the document already contains an equal redaction entity, then the original Entity is returned.
|
||||
* Also inserts the Entity into the kieSession.
|
||||
*
|
||||
* @param boundary The boundary of the redaction entity.
|
||||
* @param textRange The boundary of the redaction entity.
|
||||
* @param type The type of the redaction entity.
|
||||
* @param entityType The entity type of the redaction entity.
|
||||
* @param node The semantic node to associate with the redaction entity.
|
||||
* @return An Optional containing the redaction entity, or the previous entity if the entity already exists.
|
||||
*/
|
||||
public Optional<RedactionEntity> byBoundary(Boundary boundary, String type, EntityType entityType, SemanticNode node) {
|
||||
public Optional<TextEntity> byBoundary(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
if (!node.getBoundary().contains(boundary)) {
|
||||
throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", boundary, node.getBoundary(), node));
|
||||
if (!node.getTextRange().contains(textRange)) {
|
||||
throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", textRange, node.getTextRange(), node));
|
||||
}
|
||||
Boundary trimmedBoundary = boundary.trim(node.getTextBlock());
|
||||
RedactionEntity entity = RedactionEntity.initialEntityNode(trimmedBoundary, type, entityType);
|
||||
TextRange trimmedTextRange = textRange.trim(node.getTextBlock());
|
||||
TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType);
|
||||
if (node.getEntities().contains(entity)) {
|
||||
return node.getEntities().stream().filter(entity::equals).peek(e -> e.addEngine(Engine.RULE)).findAny();
|
||||
}
|
||||
@ -537,16 +538,16 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public RedactionEntity forceByBoundary(Boundary boundary, String type, EntityType entityType, SemanticNode node) {
|
||||
public TextEntity forceByBoundary(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
Boundary trimmedBoundary = boundary.trim(node.getTextBlock());
|
||||
RedactionEntity entity = RedactionEntity.initialEntityNode(trimmedBoundary, type, entityType);
|
||||
TextRange trimmedTextRange = textRange.trim(node.getTextBlock());
|
||||
TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType);
|
||||
addEntityToGraph(entity, node);
|
||||
return entity;
|
||||
}
|
||||
|
||||
|
||||
public RedactionEntity mergeEntitiesOfSameType(List<RedactionEntity> entitiesToMerge, String type, EntityType entityType, SemanticNode node) {
|
||||
public TextEntity mergeEntitiesOfSameType(List<TextEntity> entitiesToMerge, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
if (!allEntitiesIntersectAndHaveSameTypes(entitiesToMerge)) {
|
||||
throw new IllegalArgumentException("Provided entities can not be merged, since they do not intersect or are not the same type!" + entitiesToMerge);
|
||||
@ -558,14 +559,17 @@ public class EntityCreationService {
|
||||
return entitiesToMerge.get(0);
|
||||
}
|
||||
|
||||
RedactionEntity mergedEntity = RedactionEntity.initialEntityNode(Boundary.merge(entitiesToMerge.stream().map(RedactionEntity::getBoundary).toList()), type, entityType);
|
||||
TextEntity mergedEntity = TextEntity.initialEntityNode(TextRange.merge(entitiesToMerge.stream().map(TextEntity::getTextRange).toList()), type, entityType);
|
||||
mergedEntity.addEngines(entitiesToMerge.stream().flatMap(entityNode -> entityNode.getEngines().stream()).collect(Collectors.toSet()));
|
||||
entitiesToMerge.stream().map(RedactionEntity::getMatchedRuleList).flatMap(Collection::stream).forEach(matchedRule -> mergedEntity.getMatchedRuleList().add(matchedRule));
|
||||
entitiesToMerge.stream().map(TextEntity::getMatchedRuleList).flatMap(Collection::stream).forEach(matchedRule -> mergedEntity.getMatchedRuleList().add(matchedRule));
|
||||
entitiesToMerge.stream()
|
||||
.map(TextEntity::getManualOverwrite)
|
||||
.map(ManualChangeOverwrite::getManualChangeLog)
|
||||
.flatMap(Collection::stream)
|
||||
.forEach(manualChange -> mergedEntity.getManualOverwrite().addChange(manualChange));
|
||||
|
||||
mergedEntity.setDictionaryEntry(entitiesToMerge.stream().anyMatch(RedactionEntity::isDictionaryEntry));
|
||||
mergedEntity.setDossierDictionaryEntry(entitiesToMerge.stream().anyMatch(RedactionEntity::isDossierDictionaryEntry));
|
||||
mergedEntity.setIgnored(entitiesToMerge.stream().allMatch(RedactionEntity::isIgnored));
|
||||
mergedEntity.setRemoved(entitiesToMerge.stream().allMatch(RedactionEntity::isRemoved));
|
||||
mergedEntity.setDictionaryEntry(entitiesToMerge.stream().anyMatch(TextEntity::isDictionaryEntry));
|
||||
mergedEntity.setDossierDictionaryEntry(entitiesToMerge.stream().anyMatch(TextEntity::isDossierDictionaryEntry));
|
||||
|
||||
addEntityToGraph(mergedEntity, node);
|
||||
insertToKieSession(mergedEntity);
|
||||
@ -573,28 +577,27 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> byEntities(List<RedactionEntity> entities, String type, EntityType entityType, SemanticNode node) {
|
||||
public Stream<TextEntity> copyEntities(List<TextEntity> entities, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return entities.stream().map(entity -> byEntity(type, entityType, node, entity));
|
||||
return entities.stream().map(entity -> copyEntity(entity, type, entityType, node));
|
||||
}
|
||||
|
||||
|
||||
private RedactionEntity byEntity(String type, EntityType entityType, SemanticNode node, RedactionEntity entity) {
|
||||
public TextEntity copyEntity(TextEntity entity, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
RedactionEntity newEntity = RedactionEntity.initialEntityNode(entity.getBoundary(), type, entityType);
|
||||
TextEntity newEntity = TextEntity.initialEntityNode(entity.getTextRange(), type, entityType);
|
||||
newEntity.addEngines(entity.getEngines());
|
||||
newEntity.addMatchedRules(entity.getMatchedRuleList());
|
||||
newEntity.getManualOverwrite().addChanges(entity.getManualOverwrite().getManualChangeLog());
|
||||
newEntity.setDictionaryEntry(entity.isDictionaryEntry());
|
||||
newEntity.setDossierDictionaryEntry(entity.isDossierDictionaryEntry());
|
||||
newEntity.setIgnored(entity.isIgnored());
|
||||
newEntity.setRemoved(entity.isRemoved());
|
||||
addEntityToGraph(newEntity, node);
|
||||
insertToKieSession(newEntity);
|
||||
return newEntity;
|
||||
}
|
||||
|
||||
|
||||
private void insertToKieSession(RedactionEntity mergedEntity) {
|
||||
public void insertToKieSession(TextEntity mergedEntity) {
|
||||
|
||||
if (kieSession != null) {
|
||||
kieSession.insert(mergedEntity);
|
||||
@ -602,25 +605,25 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public RedactionEntity byNerEntity(NerEntities.NerEntity nerEntity, EntityType entityType, SemanticNode semanticNode) {
|
||||
public TextEntity byNerEntity(NerEntities.NerEntity nerEntity, EntityType entityType, SemanticNode semanticNode) {
|
||||
|
||||
var entity = forceByBoundary(nerEntity.boundary(), nerEntity.type(), entityType, semanticNode);
|
||||
var entity = forceByBoundary(nerEntity.textRange(), nerEntity.type(), entityType, semanticNode);
|
||||
entity.addEngine(Engine.NER);
|
||||
insertToKieSession(entity);
|
||||
return entity;
|
||||
}
|
||||
|
||||
|
||||
public RedactionEntity byNerEntity(NerEntities.NerEntity nerEntity, String type, EntityType entityType, SemanticNode semanticNode) {
|
||||
public TextEntity byNerEntity(NerEntities.NerEntity nerEntity, String type, EntityType entityType, SemanticNode semanticNode) {
|
||||
|
||||
var entity = forceByBoundary(nerEntity.boundary(), type, entityType, semanticNode);
|
||||
var entity = forceByBoundary(nerEntity.textRange(), type, entityType, semanticNode);
|
||||
entity.addEngine(Engine.NER);
|
||||
insertToKieSession(entity);
|
||||
return entity;
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> combineNerEntitiesToCbiAddressDefaults(NerEntities nerEntities, String type, EntityType entityType, SemanticNode semanticNode) {
|
||||
public Stream<TextEntity> combineNerEntitiesToCbiAddressDefaults(NerEntities nerEntities, String type, EntityType entityType, SemanticNode semanticNode) {
|
||||
|
||||
return NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
|
||||
.map(boundary -> forceByBoundary(boundary, type, entityType, semanticNode))
|
||||
@ -629,28 +632,28 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public RedactionEntity byTableCellAsHighlight(TableCell tableCell, String type, EntityType entityType) {
|
||||
public TextEntity byTableCellAsHighlight(TableCell tableCell, String type, EntityType entityType) {
|
||||
|
||||
RedactionEntity highlightEntity = RedactionEntity.initialEntityNode(new Boundary(tableCell.getBoundary().start(), tableCell.getBoundary().start()), type, entityType);
|
||||
TextEntity highlightEntity = TextEntity.initialEntityNode(new TextRange(tableCell.getTextRange().start(), tableCell.getTextRange().start()), type, entityType);
|
||||
|
||||
String positionId = IdBuilder.buildId(tableCell.getBBox().keySet(), tableCell.getBBox().values().stream().toList(), type, entityType.name());
|
||||
highlightEntity.setRedactionPositionsPerPage(tableCell.getBBox()
|
||||
highlightEntity.setPositionsOnPagePerPage(tableCell.getBBox()
|
||||
.entrySet()
|
||||
.stream()
|
||||
.map(entry -> new RedactionPosition(positionId, entry.getKey(), List.of(entry.getValue())))
|
||||
.map(entry -> new PositionOnPage(positionId, entry.getKey(), List.of(entry.getValue())))
|
||||
.toList());
|
||||
addEntityToGraph(highlightEntity, tableCell);
|
||||
return highlightEntity;
|
||||
}
|
||||
|
||||
|
||||
public boolean isValidEntityBoundary(TextBlock textBlock, Boundary boundary) {
|
||||
public boolean isValidEntityBoundary(TextBlock textBlock, TextRange textRange) {
|
||||
|
||||
return boundary.length() > 0 && boundaryIsSurroundedBySeparators(textBlock, boundary);
|
||||
return textRange.length() > 0 && boundaryIsSurroundedBySeparators(textBlock, textRange);
|
||||
}
|
||||
|
||||
|
||||
public void addEntityToGraph(RedactionEntity entity, SemanticNode node) {
|
||||
public void addEntityToGraph(TextEntity entity, SemanticNode node) {
|
||||
|
||||
DocumentTree documentTree = node.getDocumentTree();
|
||||
try {
|
||||
@ -667,10 +670,10 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
private void addEntityToGraph(RedactionEntity entity, DocumentTree documentTree) {
|
||||
private void addEntityToGraph(TextEntity entity, DocumentTree documentTree) {
|
||||
|
||||
SemanticNode containingNode = documentTree.childNodes(Collections.emptyList())
|
||||
.filter(node -> node.getTextBlock().containsBoundary(entity.getBoundary()))
|
||||
.filter(node -> node.getTextBlock().containsTextRange(entity.getTextRange()))
|
||||
.findFirst()
|
||||
.orElseThrow(() -> new NoSuchElementException("No containing Node found!"));
|
||||
|
||||
@ -684,30 +687,30 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
private static void addToPages(RedactionEntity entity) {
|
||||
private static void addToPages(TextEntity entity) {
|
||||
|
||||
Set<Page> pages = entity.getDeepestFullyContainingNode().getPages(entity.getBoundary());
|
||||
Set<Page> pages = entity.getDeepestFullyContainingNode().getPages(entity.getTextRange());
|
||||
entity.getPages().addAll(pages);
|
||||
pages.forEach(page -> page.getEntities().add(entity));
|
||||
}
|
||||
|
||||
|
||||
private static void addEntityToNodeEntitySets(RedactionEntity entity) {
|
||||
private static void addEntityToNodeEntitySets(TextEntity entity) {
|
||||
|
||||
entity.getIntersectingNodes().forEach(node -> node.getEntities().add(entity));
|
||||
}
|
||||
|
||||
|
||||
private static boolean allEntitiesIntersectAndHaveSameTypes(List<RedactionEntity> entitiesToMerge) {
|
||||
private static boolean allEntitiesIntersectAndHaveSameTypes(List<TextEntity> entitiesToMerge) {
|
||||
|
||||
if (entitiesToMerge.isEmpty()) {
|
||||
return true;
|
||||
}
|
||||
RedactionEntity previousEntity = entitiesToMerge.get(0);
|
||||
for (RedactionEntity redactionEntity : entitiesToMerge.subList(1, entitiesToMerge.size())) {
|
||||
boolean typeMatches = redactionEntity.getType().equals(previousEntity.getType());
|
||||
boolean entityTypeMatches = redactionEntity.getEntityType().equals(previousEntity.getEntityType());
|
||||
boolean intersects = redactionEntity.intersects(previousEntity);
|
||||
TextEntity previousEntity = entitiesToMerge.get(0);
|
||||
for (TextEntity textEntity : entitiesToMerge.subList(1, entitiesToMerge.size())) {
|
||||
boolean typeMatches = textEntity.getType().equals(previousEntity.getType());
|
||||
boolean entityTypeMatches = textEntity.getEntityType().equals(previousEntity.getEntityType());
|
||||
boolean intersects = textEntity.intersects(previousEntity);
|
||||
if (!typeMatches || !entityTypeMatches || !intersects) {
|
||||
return false;
|
||||
}
|
||||
@ -716,9 +719,9 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
private static Boundary toLineAfterBoundary(TextBlock textBlock, Boundary boundary) {
|
||||
private static TextRange toLineAfterBoundary(TextBlock textBlock, TextRange textRange) {
|
||||
|
||||
return new Boundary(boundary.end(), textBlock.getNextLinebreak(boundary.end())).trim(textBlock);
|
||||
return new TextRange(textRange.end(), textBlock.getNextLinebreak(textRange.end())).trim(textBlock);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -6,7 +6,7 @@ import java.util.Objects;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
@ -19,17 +19,17 @@ public class EntityEnrichmentService {
|
||||
private final RedactionServiceSettings redactionServiceSettings;
|
||||
|
||||
|
||||
public void enrichEntity(RedactionEntity entity, TextBlock textBlock) {
|
||||
public void enrichEntity(TextEntity entity, TextBlock textBlock) {
|
||||
|
||||
entity.setValue(textBlock.subSequence(entity.getBoundary()).toString());
|
||||
entity.setTextAfter(findTextAfter(entity.getBoundary().end(), textBlock));
|
||||
entity.setTextBefore(findTextBefore(entity.getBoundary().start(), textBlock));
|
||||
entity.setValue(textBlock.subSequence(entity.getTextRange()).toString());
|
||||
entity.setTextAfter(findTextAfter(entity.getTextRange().end(), textBlock));
|
||||
entity.setTextBefore(findTextBefore(entity.getTextRange().start(), textBlock));
|
||||
}
|
||||
|
||||
|
||||
private String findTextAfter(int index, TextBlock textBlock) {
|
||||
|
||||
int endOffset = Math.min(index + redactionServiceSettings.getSurroundingWordsOffsetWindow(), textBlock.getBoundary().end());
|
||||
int endOffset = Math.min(index + redactionServiceSettings.getSurroundingWordsOffsetWindow(), textBlock.getTextRange().end());
|
||||
String textAfter = textBlock.subSequence(index, endOffset).toString();
|
||||
if (!textAfter.isBlank()) {
|
||||
List<String> wordsAfter = splitToWordsAndRemoveEmptyWords(textAfter);
|
||||
@ -44,7 +44,7 @@ public class EntityEnrichmentService {
|
||||
|
||||
private String findTextBefore(int index, TextBlock textBlock) {
|
||||
|
||||
int offsetBefore = Math.max(index - redactionServiceSettings.getSurroundingWordsOffsetWindow(), textBlock.getBoundary().start());
|
||||
int offsetBefore = Math.max(index - redactionServiceSettings.getSurroundingWordsOffsetWindow(), textBlock.getTextRange().start());
|
||||
String textBefore = textBlock.subSequence(offsetBefore, index).toString();
|
||||
if (!textBefore.isBlank()) {
|
||||
List<String> wordsBefore = splitToWordsAndRemoveEmptyWords(textBefore);
|
||||
|
||||
@ -0,0 +1,95 @@
|
||||
package com.iqser.red.service.redaction.v1.server.document.services;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.PositionOnPage;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@RequiredArgsConstructor
|
||||
public class ManualChangesApplicationService {
|
||||
|
||||
private final EntityCreationService entityCreationService;
|
||||
|
||||
|
||||
public void recategorize(Entity entityToBeReCategorized, ManualImageRecategorization manualImageRecategorization) {
|
||||
|
||||
if (entityToBeReCategorized instanceof Image image) {
|
||||
image.setImageType(ImageType.fromString(manualImageRecategorization.getType()));
|
||||
return;
|
||||
}
|
||||
// need to create a new entity and copy over all values, since type is part of the primary key for entities and should never be changed!
|
||||
if (entityToBeReCategorized instanceof TextEntity textEntity) {
|
||||
TextEntity recategorizedEntity = entityCreationService.copyEntity(textEntity, manualImageRecategorization.getType(), textEntity.getEntityType(), textEntity.getDeepestFullyContainingNode());
|
||||
recategorizedEntity.setPositionsOnPagePerPage(textEntity.getPositionsOnPagePerPage());
|
||||
recategorizedEntity.getManualOverwrite().addChange(manualImageRecategorization);
|
||||
textEntity.removeFromGraph();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void resizeEntityAndReinsert(TextEntity entityToBeResized, ManualResizeRedaction manualResizeRedaction) {
|
||||
|
||||
PositionOnPage positionOnPageToBeResized = entityToBeResized.getPositionsOnPagePerPage()
|
||||
.stream()
|
||||
.filter(redactionPosition -> redactionPosition.getId().equals(manualResizeRedaction.getAnnotationId()))
|
||||
.findFirst()
|
||||
.orElseThrow(() -> new NoSuchElementException("No redaction position with matching annotation id found!"));
|
||||
|
||||
positionOnPageToBeResized.setRectanglePerLine(manualResizeRedaction.getPositions().stream().map(ManualChangesApplicationService::toRectangle2D).toList());
|
||||
|
||||
int newStartOffset;
|
||||
if (manualResizeRedaction.getValue().length() > entityToBeResized.getValue().length()) {
|
||||
newStartOffset = entityToBeResized.getTextRange().start() - manualResizeRedaction.getValue().indexOf(entityToBeResized.getValue());
|
||||
} else {
|
||||
newStartOffset = entityToBeResized.getTextRange().start() + entityToBeResized.getValue().indexOf(manualResizeRedaction.getValue());
|
||||
}
|
||||
|
||||
// need to reinsert the entity, due to the boundary having changed.
|
||||
removeAndUpdateAndReInsertEntity(entityToBeResized, manualResizeRedaction, newStartOffset);
|
||||
entityToBeResized.getManualOverwrite().addChange(manualResizeRedaction);
|
||||
}
|
||||
|
||||
|
||||
private void removeAndUpdateAndReInsertEntity(TextEntity entityToBeResized, ManualResizeRedaction manualResizeRedaction, int newStartOffset) {
|
||||
|
||||
SemanticNode nodeToInsertInto = entityToBeResized.getDeepestFullyContainingNode().getDocumentTree().getRoot().getNode();
|
||||
entityToBeResized.getIntersectingNodes().forEach(node -> node.getEntities().remove(this));
|
||||
entityToBeResized.getPages().forEach(page -> page.getEntities().remove(this));
|
||||
entityToBeResized.setIntersectingNodes(new LinkedList<>());
|
||||
entityToBeResized.setDeepestFullyContainingNode(null);
|
||||
entityToBeResized.setPages(new HashSet<>());
|
||||
entityToBeResized.getTextRange().setStart(newStartOffset);
|
||||
entityToBeResized.getTextRange().setEnd(newStartOffset + manualResizeRedaction.getValue().length());
|
||||
entityCreationService.addEntityToGraph(entityToBeResized, nodeToInsertInto);
|
||||
}
|
||||
|
||||
|
||||
public void resizeImage(Image image, ManualResizeRedaction manualResizeRedaction) {
|
||||
|
||||
if (manualResizeRedaction.getPositions().isEmpty() || manualResizeRedaction.getPositions() == null) {
|
||||
return;
|
||||
}
|
||||
var bBox = RectangleTransformations.rectangle2DBBox(manualResizeRedaction.getPositions().stream().map(ManualChangesApplicationService::toRectangle2D).toList());
|
||||
image.setPosition(bBox);
|
||||
image.getManualOverwrite().addChange(manualResizeRedaction);
|
||||
}
|
||||
|
||||
|
||||
private static Rectangle2D toRectangle2D(com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle rect) {
|
||||
|
||||
return new Rectangle2D.Double(rect.getTopLeftX() - rect.getWidth(), rect.getTopLeftY() - rect.getHeight(), rect.getWidth(), rect.getHeight());
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,64 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.document.services;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@RequiredArgsConstructor
|
||||
public class ManualRedactionApplicationService {
|
||||
|
||||
private final EntityCreationService entityCreationService;
|
||||
|
||||
|
||||
public void resizeEntityAndReinsert(RedactionEntity entityToBeResized, ManualResizeRedaction manualResizeRedaction) {
|
||||
|
||||
RedactionPosition redactionPositionToBeResized = entityToBeResized.getRedactionPositionsPerPage()
|
||||
.stream()
|
||||
.filter(redactionPosition -> redactionPosition.getId().equals(manualResizeRedaction.getAnnotationId()))
|
||||
.findFirst()
|
||||
.orElseThrow(() -> new NoSuchElementException("No redaction position with matching annotation id found!"));
|
||||
|
||||
redactionPositionToBeResized.setRectanglePerLine(manualResizeRedaction.getPositions().stream().map(ManualRedactionApplicationService::toRectangle2D).toList());
|
||||
|
||||
int newStartOffset;
|
||||
if (manualResizeRedaction.getValue().length() > entityToBeResized.getValue().length()) {
|
||||
newStartOffset = entityToBeResized.getBoundary().start() - manualResizeRedaction.getValue().indexOf(entityToBeResized.getValue());
|
||||
} else {
|
||||
newStartOffset = entityToBeResized.getBoundary().start() + entityToBeResized.getValue().indexOf(manualResizeRedaction.getValue());
|
||||
}
|
||||
|
||||
SemanticNode nodeToInsertInto = entityToBeResized.getDeepestFullyContainingNode().getDocumentTree().getRoot().getNode();
|
||||
entityToBeResized.removeFromGraph();
|
||||
entityToBeResized.setResized(true);
|
||||
entityToBeResized.setRemoved(false);
|
||||
entityToBeResized.setIgnored(false);
|
||||
entityToBeResized.getBoundary().setStart(newStartOffset);
|
||||
entityToBeResized.getBoundary().setEnd(newStartOffset + manualResizeRedaction.getValue().length());
|
||||
entityCreationService.addEntityToGraph(entityToBeResized, nodeToInsertInto);
|
||||
}
|
||||
|
||||
|
||||
public void resizeImage(Image image, ManualResizeRedaction manualResizeRedaction) {
|
||||
|
||||
if (manualResizeRedaction.getPositions().isEmpty() || manualResizeRedaction.getPositions() == null) {
|
||||
return;
|
||||
}
|
||||
var bBox = RectangleTransformations.rectangle2DBBox(manualResizeRedaction.getPositions().stream().map(ManualRedactionApplicationService::toRectangle2D).toList());
|
||||
image.setPosition(bBox);
|
||||
}
|
||||
|
||||
|
||||
private static Rectangle2D toRectangle2D(com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle rect) {
|
||||
|
||||
return new Rectangle2D.Double(rect.getTopLeftX() - rect.getWidth(), rect.getTopLeftY() - rect.getHeight(), rect.getWidth(), rect.getHeight());
|
||||
}
|
||||
|
||||
}
|
||||
@ -12,8 +12,6 @@ import java.util.function.Function;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.stream.Collector;
|
||||
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.AtomicTextBlock;
|
||||
|
||||
@ -22,7 +20,6 @@ import lombok.NoArgsConstructor;
|
||||
|
||||
public class RectangleTransformations {
|
||||
|
||||
|
||||
public static Rectangle2D atomicTextBlockBBox(List<AtomicTextBlock> atomicTextBlocks) {
|
||||
|
||||
return atomicTextBlocks.stream().flatMap(atomicTextBlock -> atomicTextBlock.getPositions().stream()).collect(new Rectangle2DBBoxCollector());
|
||||
@ -44,15 +41,6 @@ public class RectangleTransformations {
|
||||
}
|
||||
|
||||
|
||||
public static Rectangle toRedactionLogRectangle(Rectangle2D rectangle2D, int pageNumber) {
|
||||
|
||||
return new Rectangle(new Point((float) rectangle2D.getMinX(), (float) (rectangle2D.getMinY() + rectangle2D.getHeight())),
|
||||
(float) rectangle2D.getWidth(),
|
||||
-(float) rectangle2D.getHeight(),
|
||||
pageNumber);
|
||||
}
|
||||
|
||||
|
||||
public static Rectangle2D rectangle2DBBox(List<Rectangle2D> rectangle2DList) {
|
||||
|
||||
return rectangle2DList.stream().collect(new Rectangle2DBBoxCollector());
|
||||
|
||||
@ -9,8 +9,8 @@ import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
|
||||
|
||||
@ -51,60 +51,60 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
public static Boundary findFirstBoundary(String regexPattern, CharSequence searchText) {
|
||||
public static TextRange findFirstBoundary(String regexPattern, CharSequence searchText) {
|
||||
|
||||
Pattern pattern = Patterns.getCompiledPattern(regexPattern, false);
|
||||
Matcher matcher = pattern.matcher(searchText);
|
||||
if (matcher.find()) {
|
||||
return new Boundary(matcher.start(), matcher.end());
|
||||
return new TextRange(matcher.start(), matcher.end());
|
||||
}
|
||||
throw new IllegalArgumentException(format("Charsequence %s does not contain any matches for pattern %s", searchText, regexPattern));
|
||||
}
|
||||
|
||||
|
||||
public static int getExpandedEndByRegex(RedactionEntity entity, String regexPattern) {
|
||||
public static int getExpandedEndByRegex(TextEntity entity, String regexPattern) {
|
||||
|
||||
int expandedEnd;
|
||||
if (anyMatch(entity.getTextAfter(), regexPattern)) {
|
||||
Boundary postfixBoundary = findFirstBoundary(regexPattern, entity.getTextAfter());
|
||||
expandedEnd = postfixBoundary.end() + entity.getBoundary().end();
|
||||
TextRange postfixTextRange = findFirstBoundary(regexPattern, entity.getTextAfter());
|
||||
expandedEnd = postfixTextRange.end() + entity.getTextRange().end();
|
||||
} else {
|
||||
expandedEnd = entity.getBoundary().end();
|
||||
expandedEnd = entity.getTextRange().end();
|
||||
}
|
||||
return expandedEnd;
|
||||
}
|
||||
|
||||
|
||||
public static int getExpandedStartByRegex(RedactionEntity entity, String regexPattern) {
|
||||
public static int getExpandedStartByRegex(TextEntity entity, String regexPattern) {
|
||||
|
||||
int expandedStart;
|
||||
if (anyMatch(entity.getTextBefore(), regexPattern)) {
|
||||
Boundary prefixBoundary = findFirstBoundary(regexPattern, entity.getTextBefore());
|
||||
expandedStart = prefixBoundary.start() + entity.getBoundary().start() - entity.getTextBefore().length();
|
||||
TextRange prefixTextRange = findFirstBoundary(regexPattern, entity.getTextBefore());
|
||||
expandedStart = prefixTextRange.start() + entity.getTextRange().start() - entity.getTextBefore().length();
|
||||
} else {
|
||||
expandedStart = entity.getBoundary().start();
|
||||
expandedStart = entity.getTextRange().start();
|
||||
}
|
||||
return expandedStart;
|
||||
}
|
||||
|
||||
public static Boundary findBoundaryOfAllLinesInYRange(double maxY, double minY, TextBlock textBlock) {
|
||||
public static TextRange findBoundaryOfAllLinesInYRange(double maxY, double minY, TextBlock textBlock) {
|
||||
|
||||
List<Boundary> lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed().map(textBlock::getLineBoundary).filter(lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary)).toList();
|
||||
List<TextRange> lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed().map(textBlock::getLineTextRange).filter(lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary)).toList();
|
||||
if (lineBoundaries.isEmpty()) {
|
||||
return new Boundary(textBlock.getBoundary().start(), textBlock.getBoundary().start());
|
||||
return new TextRange(textBlock.getTextRange().start(), textBlock.getTextRange().start());
|
||||
}
|
||||
return Boundary.merge(lineBoundaries);
|
||||
return TextRange.merge(lineBoundaries);
|
||||
}
|
||||
|
||||
|
||||
private static boolean isWithinYRange(double maxY, double minY, TextBlock textBlock, Boundary lineBoundary) {
|
||||
private static boolean isWithinYRange(double maxY, double minY, TextBlock textBlock, TextRange lineTextRange) {
|
||||
|
||||
Rectangle2D lineBBox = RectangleTransformations.rectangle2DBBox(textBlock.getPositions(lineBoundary));
|
||||
Rectangle2D lineBBox = RectangleTransformations.rectangle2DBBox(textBlock.getPositions(lineTextRange));
|
||||
return lineBBox.getMinY() < maxY && minY < lineBBox.getMaxY();
|
||||
}
|
||||
|
||||
|
||||
public static List<Boundary> findBoundariesByRegex(String regexPattern, TextBlock textBlock) {
|
||||
public static List<TextRange> findBoundariesByRegex(String regexPattern, TextBlock textBlock) {
|
||||
|
||||
Pattern pattern = Patterns.getCompiledPattern(regexPattern, false);
|
||||
return getBoundariesByPattern(textBlock, 0, pattern);
|
||||
@ -112,68 +112,68 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
public static List<Boundary> findBoundariesByRegex(String regexPattern, int group, TextBlock textBlock) {
|
||||
public static List<TextRange> findBoundariesByRegex(String regexPattern, int group, TextBlock textBlock) {
|
||||
|
||||
Pattern pattern = Patterns.getCompiledPattern(regexPattern, false);
|
||||
return getBoundariesByPattern(textBlock, group, pattern);
|
||||
}
|
||||
|
||||
|
||||
public static List<Boundary> findBoundariesByRegexWithLineBreaks(String regexPattern, int group, TextBlock textBlock) {
|
||||
public static List<TextRange> findBoundariesByRegexWithLineBreaks(String regexPattern, int group, TextBlock textBlock) {
|
||||
|
||||
Pattern pattern = Patterns.getCompiledMultilinePattern(regexPattern, false);
|
||||
return getBoundariesByPatternWithLineBreaks(textBlock, group, pattern);
|
||||
}
|
||||
|
||||
|
||||
public static List<Boundary> findBoundariesByRegexWithLineBreaksIgnoreCase(String regexPattern, int group, TextBlock textBlock) {
|
||||
public static List<TextRange> findBoundariesByRegexWithLineBreaksIgnoreCase(String regexPattern, int group, TextBlock textBlock) {
|
||||
|
||||
Pattern pattern = Patterns.getCompiledMultilinePattern(regexPattern, true);
|
||||
return getBoundariesByPatternWithLineBreaks(textBlock, group, pattern);
|
||||
}
|
||||
|
||||
|
||||
public static List<Boundary> findBoundariesByRegexIgnoreCase(String regexPattern, int group, TextBlock textBlock) {
|
||||
public static List<TextRange> findBoundariesByRegexIgnoreCase(String regexPattern, int group, TextBlock textBlock) {
|
||||
|
||||
Pattern pattern = Patterns.getCompiledPattern(regexPattern, true);
|
||||
return getBoundariesByPattern(textBlock, group, pattern);
|
||||
}
|
||||
|
||||
|
||||
private static List<Boundary> getBoundariesByPattern(TextBlock textBlock, int group, Pattern pattern) {
|
||||
private static List<TextRange> getBoundariesByPattern(TextBlock textBlock, int group, Pattern pattern) {
|
||||
|
||||
Matcher matcher = pattern.matcher(textBlock.subSequence(textBlock.getBoundary()));
|
||||
List<Boundary> boundaries = new LinkedList<>();
|
||||
Matcher matcher = pattern.matcher(textBlock.subSequence(textBlock.getTextRange()));
|
||||
List<TextRange> boundaries = new LinkedList<>();
|
||||
while (matcher.find()) {
|
||||
boundaries.add(new Boundary(matcher.start(group) + textBlock.getBoundary().start(), matcher.end(group) + textBlock.getBoundary().start()));
|
||||
boundaries.add(new TextRange(matcher.start(group) + textBlock.getTextRange().start(), matcher.end(group) + textBlock.getTextRange().start()));
|
||||
}
|
||||
return boundaries;
|
||||
}
|
||||
|
||||
|
||||
private static List<Boundary> getBoundariesByPatternWithLineBreaks(TextBlock textBlock, int group, Pattern pattern) {
|
||||
private static List<TextRange> getBoundariesByPatternWithLineBreaks(TextBlock textBlock, int group, Pattern pattern) {
|
||||
|
||||
String searchTextWithLineBreaks = textBlock.searchTextWithLineBreaks();
|
||||
Matcher matcher = pattern.matcher(searchTextWithLineBreaks);
|
||||
List<Boundary> boundaries = new LinkedList<>();
|
||||
List<TextRange> boundaries = new LinkedList<>();
|
||||
while (matcher.find()) {
|
||||
boundaries.add(new Boundary(matcher.start(group) + textBlock.getBoundary().start(), matcher.end(group) + textBlock.getBoundary().start()));
|
||||
boundaries.add(new TextRange(matcher.start(group) + textBlock.getTextRange().start(), matcher.end(group) + textBlock.getTextRange().start()));
|
||||
}
|
||||
return boundaries;
|
||||
}
|
||||
|
||||
|
||||
public static List<Boundary> findBoundariesByString(String searchString, TextBlock textBlock) {
|
||||
public static List<TextRange> findBoundariesByString(String searchString, TextBlock textBlock) {
|
||||
|
||||
List<Boundary> boundaries = new LinkedList<>();
|
||||
List<TextRange> boundaries = new LinkedList<>();
|
||||
for (int index = textBlock.indexOf(searchString); index >= 0; index = textBlock.indexOf(searchString, index + 1)) {
|
||||
boundaries.add(new Boundary(index, index + searchString.length()));
|
||||
boundaries.add(new TextRange(index, index + searchString.length()));
|
||||
}
|
||||
return boundaries;
|
||||
}
|
||||
|
||||
|
||||
public static List<Boundary> findBoundariesByStringIgnoreCase(String searchString, TextBlock textBlock) {
|
||||
public static List<TextRange> findBoundariesByStringIgnoreCase(String searchString, TextBlock textBlock) {
|
||||
|
||||
Pattern pattern = Pattern.compile(Pattern.quote(searchString), Pattern.CASE_INSENSITIVE);
|
||||
return getBoundariesByPattern(textBlock, 0, pattern);
|
||||
|
||||
@ -20,15 +20,15 @@ import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.PositionOnPage;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.ManualEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.RectangleWithPage;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.SearchImplementation;
|
||||
|
||||
@ -49,85 +49,85 @@ public class CustomEntityCreationAdapter {
|
||||
}
|
||||
|
||||
|
||||
public List<EntityIdentifier> toRedactionEntity(RedactionLog redactionLog, SemanticNode node) {
|
||||
public List<ManualEntity> toRedactionEntity(RedactionLog redactionLog, SemanticNode node) {
|
||||
|
||||
List<EntityIdentifier> entityIdentifiers = redactionLog.getRedactionLogEntry().stream().map(EntityIdentifier::fromRedactionLogEntry).toList();
|
||||
return toRedactionEntity(entityIdentifiers, node);
|
||||
List<ManualEntity> manualEntities = redactionLog.getRedactionLogEntry().stream().map(ManualEntity::fromRedactionLogEntry).toList();
|
||||
return toRedactionEntity(manualEntities, node);
|
||||
}
|
||||
|
||||
|
||||
public List<EntityIdentifier> createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set<ManualRedactionEntry> manualRedactionEntries, SemanticNode node) {
|
||||
public List<ManualEntity> createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set<ManualRedactionEntry> manualRedactionEntries, SemanticNode node) {
|
||||
|
||||
List<EntityIdentifier> entityIdentifiers = manualRedactionEntries.stream()
|
||||
List<ManualEntity> manualEntities = manualRedactionEntries.stream()
|
||||
.filter(manualRedactionEntry -> !(manualRedactionEntry.isAddToDictionary() || manualRedactionEntry.isAddToDossierDictionary()))
|
||||
.map(EntityIdentifier::fromManualRedactionEntry)
|
||||
.map(ManualEntity::fromManualRedactionEntry)
|
||||
.toList();
|
||||
|
||||
return toRedactionEntity(entityIdentifiers, node);
|
||||
return toRedactionEntity(manualEntities, node);
|
||||
}
|
||||
|
||||
|
||||
private List<EntityIdentifier> toRedactionEntity(List<EntityIdentifier> entityIdentifiers, SemanticNode node) {
|
||||
private List<ManualEntity> toRedactionEntity(List<ManualEntity> manualEntities, SemanticNode node) {
|
||||
|
||||
Set<Integer> pageNumbers = entityIdentifiers.stream().flatMap(entry -> entry.getEntityPosition().stream().map(RectangleWithPage::pageNumber)).collect(Collectors.toSet());
|
||||
Set<String> entryValues = entityIdentifiers.stream().map(EntityIdentifier::getValue).map(String::toLowerCase).collect(Collectors.toSet());
|
||||
Set<Integer> pageNumbers = manualEntities.stream().flatMap(entry -> entry.getEntityPosition().stream().map(RectangleWithPage::pageNumber)).collect(Collectors.toSet());
|
||||
Set<String> entryValues = manualEntities.stream().map(ManualEntity::getValue).map(String::toLowerCase).collect(Collectors.toSet());
|
||||
|
||||
Map<String, List<RedactionEntity>> tempEntitiesByValue = findAllPossibleEntitiesAndGroupByValue(node, pageNumbers, entryValues);
|
||||
Map<String, List<TextEntity>> tempEntitiesByValue = findAllPossibleEntitiesAndGroupByValue(node, pageNumbers, entryValues);
|
||||
|
||||
List<EntityIdentifier> notFoundEntityIdentifiers = new LinkedList<>();
|
||||
for (EntityIdentifier entityIdentifier : entityIdentifiers) {
|
||||
Optional<RedactionEntity> optionalRedactionEntity = findClosestEntityAndReturnEmptyIfNotFound(entityIdentifier, tempEntitiesByValue);
|
||||
List<ManualEntity> notFoundManualEntities = new LinkedList<>();
|
||||
for (ManualEntity manualEntity : manualEntities) {
|
||||
Optional<TextEntity> optionalRedactionEntity = findClosestEntityAndReturnEmptyIfNotFound(manualEntity, tempEntitiesByValue);
|
||||
if (optionalRedactionEntity.isEmpty()) {
|
||||
notFoundEntityIdentifiers.add(entityIdentifier);
|
||||
notFoundManualEntities.add(manualEntity);
|
||||
continue;
|
||||
}
|
||||
createCorrectEntity(entityIdentifier, node, optionalRedactionEntity.get().getBoundary());
|
||||
createCorrectEntity(manualEntity, node, optionalRedactionEntity.get().getTextRange());
|
||||
}
|
||||
tempEntitiesByValue.values().stream().flatMap(Collection::stream).forEach(RedactionEntity::removeFromGraph);
|
||||
return notFoundEntityIdentifiers;
|
||||
tempEntitiesByValue.values().stream().flatMap(Collection::stream).forEach(TextEntity::removeFromGraph);
|
||||
return notFoundManualEntities;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Deletes the temp Entity and creates a RedactionEntity with correct values, based on the given parameters.
|
||||
*
|
||||
* @param entityIdentifier The entity identifier for the RedactionEntity.
|
||||
* @param manualEntity The entity identifier for the RedactionEntity.
|
||||
* @param node The SemanticNode associated with the RedactionEntity.
|
||||
* @param closestBoundary The closest Boundary to the RedactionEntity.
|
||||
* @param closestTextRange The closest Boundary to the RedactionEntity.
|
||||
*/
|
||||
private void createCorrectEntity(EntityIdentifier entityIdentifier, SemanticNode node, Boundary closestBoundary) {
|
||||
private void createCorrectEntity(ManualEntity manualEntity, SemanticNode node, TextRange closestTextRange) {
|
||||
|
||||
RedactionEntity correctEntity = entityCreationService.forceByBoundary(closestBoundary, entityIdentifier.getType(), entityIdentifier.getEntityType(), node);
|
||||
TextEntity correctEntity = entityCreationService.forceByBoundary(closestTextRange, manualEntity.getType(), manualEntity.getEntityType(), node);
|
||||
|
||||
if (entityIdentifier.isApplied()) {
|
||||
correctEntity.force(entityIdentifier.getRuleIdentifier(), entityIdentifier.getReason(), entityIdentifier.getLegalBasis());
|
||||
if (manualEntity.isApplied()) {
|
||||
correctEntity.force(manualEntity.getRuleIdentifier(), manualEntity.getReason(), manualEntity.getLegalBasis());
|
||||
} else {
|
||||
correctEntity.skip(entityIdentifier.getRuleIdentifier(), entityIdentifier.getReason());
|
||||
correctEntity.skip(manualEntity.getRuleIdentifier(), manualEntity.getReason());
|
||||
}
|
||||
correctEntity.setDictionaryEntry(entityIdentifier.isDictionaryEntry());
|
||||
correctEntity.setDossierDictionaryEntry(entityIdentifier.isDossierDictionaryEntry());
|
||||
correctEntity.setDictionaryEntry(manualEntity.isDictionaryEntry());
|
||||
correctEntity.setDossierDictionaryEntry(manualEntity.isDossierDictionaryEntry());
|
||||
correctEntity.getManualOverwrite().addChanges(manualEntity.getManualOverwrite().getManualChangeLog());
|
||||
|
||||
|
||||
// TODO: refactor this away! This is only needed so the persistence service can apply the correct comment and ManualChanges.
|
||||
// It would be better, if the redaction-service returns a map of annotationId changes and the persistence service then migrates the annotationIds of Comments and ManualRedactions
|
||||
List<RedactionPosition> redactionPositionsWithIdOfManualRedaction = new ArrayList<>(correctEntity.getRedactionPositionsPerPage().size());
|
||||
for (RedactionPosition redactionPosition : correctEntity.getRedactionPositionsPerPage()) {
|
||||
redactionPositionsWithIdOfManualRedaction.add(new RedactionPosition(entityIdentifier.getId(), redactionPosition.getPage(), redactionPosition.getRectanglePerLine()));
|
||||
// AnnotationIds must match the IDs in the add requests, or comments break. Maybe think about migrating IDs on the fly!
|
||||
List<PositionOnPage> redactionPositionsWithIdOfManualOnPage = new ArrayList<>(correctEntity.getPositionsOnPagePerPage().size());
|
||||
for (PositionOnPage positionOnPage : correctEntity.getPositionsOnPagePerPage()) {
|
||||
redactionPositionsWithIdOfManualOnPage.add(new PositionOnPage(manualEntity.getId(), positionOnPage.getPage(), positionOnPage.getRectanglePerLine()));
|
||||
}
|
||||
correctEntity.setRedactionPositionsPerPage(redactionPositionsWithIdOfManualRedaction);
|
||||
correctEntity.setPositionsOnPagePerPage(redactionPositionsWithIdOfManualOnPage);
|
||||
}
|
||||
|
||||
|
||||
private Optional<RedactionEntity> findClosestEntityAndReturnEmptyIfNotFound(EntityIdentifier identifier, Map<String, List<RedactionEntity>> entitiesWithSameValue) {
|
||||
private Optional<TextEntity> findClosestEntityAndReturnEmptyIfNotFound(ManualEntity identifier, Map<String, List<TextEntity>> entitiesWithSameValue) {
|
||||
|
||||
List<RedactionEntity> possibleEntities = entitiesWithSameValue.get(identifier.getValue().toLowerCase(Locale.ROOT));
|
||||
List<TextEntity> possibleEntities = entitiesWithSameValue.get(identifier.getValue().toLowerCase(Locale.ROOT));
|
||||
|
||||
if (entityIdentifierValueNotFound(possibleEntities)) {
|
||||
log.warn("Entity could not be created with identifier: {}, due to the value {} not being found anywhere.", identifier, identifier.getValue());
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
Optional<RedactionEntity> optionalClosestEntity = possibleEntities.stream()
|
||||
Optional<TextEntity> optionalClosestEntity = possibleEntities.stream()
|
||||
.filter(entity -> pagesMatch(entity, identifier.getEntityPosition()))
|
||||
.min(Comparator.comparingDouble(entity -> calculateMinDistance(identifier.getEntityPosition(), entity)));
|
||||
|
||||
@ -136,14 +136,14 @@ public class CustomEntityCreationAdapter {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
RedactionEntity closestEntity = optionalClosestEntity.get();
|
||||
TextEntity closestEntity = optionalClosestEntity.get();
|
||||
double distance = calculateMinDistance(identifier.getEntityPosition(), closestEntity);
|
||||
if (distance > MATCH_THRESHOLD) {
|
||||
log.warn(format("Distance to closest found entity is %.2f and therefore higher than the threshold of %.2f for \n%s \n%s",
|
||||
distance,
|
||||
MATCH_THRESHOLD,
|
||||
identifier.getEntityPosition(),
|
||||
closestEntity.getRedactionPositionsPerPage()));
|
||||
closestEntity.getPositionsOnPagePerPage()));
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
@ -151,13 +151,13 @@ public class CustomEntityCreationAdapter {
|
||||
}
|
||||
|
||||
|
||||
private static boolean entityIdentifierValueNotFound(List<RedactionEntity> possibleEntities) {
|
||||
private static boolean entityIdentifierValueNotFound(List<TextEntity> possibleEntities) {
|
||||
|
||||
return possibleEntities == null || possibleEntities.isEmpty();
|
||||
}
|
||||
|
||||
|
||||
private Map<String, List<RedactionEntity>> findAllPossibleEntitiesAndGroupByValue(SemanticNode node, Set<Integer> pageNumbers, Set<String> entryValues) {
|
||||
private Map<String, List<TextEntity>> findAllPossibleEntitiesAndGroupByValue(SemanticNode node, Set<Integer> pageNumbers, Set<String> entryValues) {
|
||||
|
||||
if (!pageNumbers.stream().allMatch(node::onPage)) {
|
||||
throw new IllegalArgumentException(format("SemanticNode \"%s\" does not contain these pages %s, it has pages: %s",
|
||||
@ -167,28 +167,28 @@ public class CustomEntityCreationAdapter {
|
||||
}
|
||||
SearchImplementation searchImplementation = new SearchImplementation(entryValues, true);
|
||||
|
||||
return searchImplementation.getBoundaries(node.getTextBlock(), node.getBoundary())
|
||||
return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange())
|
||||
.stream()
|
||||
.map(boundary -> entityCreationService.forceByBoundary(boundary, "temp", EntityType.ENTITY, node))
|
||||
.collect(groupingBy(entity -> entity.getValue().toLowerCase(Locale.ROOT)));
|
||||
}
|
||||
|
||||
|
||||
private static boolean allValuesFound(Map<String, List<RedactionEntity>> entitiesByValue, Set<String> entryValues) {
|
||||
private static boolean allValuesFound(Map<String, List<TextEntity>> entitiesByValue, Set<String> entryValues) {
|
||||
|
||||
return entitiesByValue.keySet().equals(entryValues);
|
||||
}
|
||||
|
||||
|
||||
private static boolean pagesMatch(RedactionEntity entity, List<RectangleWithPage> originalPositions) {
|
||||
private static boolean pagesMatch(TextEntity entity, List<RectangleWithPage> originalPositions) {
|
||||
|
||||
Set<Integer> entityPageNumbers = entity.getRedactionPositionsPerPage().stream().map(RedactionPosition::getPage).map(Page::getNumber).collect(Collectors.toSet());
|
||||
Set<Integer> entityPageNumbers = entity.getPositionsOnPagePerPage().stream().map(PositionOnPage::getPage).map(Page::getNumber).collect(Collectors.toSet());
|
||||
Set<Integer> originalPageNumbers = originalPositions.stream().map(RectangleWithPage::pageNumber).collect(Collectors.toSet());
|
||||
return entityPageNumbers.containsAll(originalPageNumbers);
|
||||
}
|
||||
|
||||
|
||||
private double calculateMinDistance(List<RectangleWithPage> originalPositions, RedactionEntity entity) {
|
||||
private double calculateMinDistance(List<RectangleWithPage> originalPositions, TextEntity entity) {
|
||||
|
||||
if (originalPositions.size() != countRectangles(entity)) {
|
||||
return Double.MAX_VALUE;
|
||||
@ -199,18 +199,18 @@ public class CustomEntityCreationAdapter {
|
||||
}
|
||||
|
||||
|
||||
private static long countRectangles(RedactionEntity entity) {
|
||||
private static long countRectangles(TextEntity entity) {
|
||||
|
||||
return entity.getRedactionPositionsPerPage().stream().mapToLong(redactionPosition -> redactionPosition.getRectanglePerLine().size()).sum();
|
||||
return entity.getPositionsOnPagePerPage().stream().mapToLong(redactionPosition -> redactionPosition.getRectanglePerLine().size()).sum();
|
||||
}
|
||||
|
||||
|
||||
private double calculateMinDistancePerRectangle(RedactionEntity entity, int pageNumber, Rectangle2D originalRectangle) {
|
||||
private double calculateMinDistancePerRectangle(TextEntity entity, int pageNumber, Rectangle2D originalRectangle) {
|
||||
|
||||
return entity.getRedactionPositionsPerPage()
|
||||
return entity.getPositionsOnPagePerPage()
|
||||
.stream()
|
||||
.filter(redactionPosition -> redactionPosition.getPage().getNumber() == pageNumber)
|
||||
.map(RedactionPosition::getRectanglePerLine)
|
||||
.map(PositionOnPage::getRectanglePerLine)
|
||||
.flatMap(Collection::stream)
|
||||
.mapToDouble(rectangle -> calculateDistance(rectangle, originalRectangle))
|
||||
.min()
|
||||
|
||||
@ -4,7 +4,7 @@ import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -37,7 +37,7 @@ public class NerEntities {
|
||||
}
|
||||
|
||||
|
||||
public record NerEntity(String value, Boundary boundary, String type) {
|
||||
public record NerEntity(String value, TextRange textRange, String type) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -9,7 +9,7 @@ import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
@ -44,7 +44,7 @@ public class NerEntitiesAdapter {
|
||||
|
||||
return new NerEntities(addOffsetsAndFlatten(getStringStartOffsetsForMainSections(document),
|
||||
nerEntitiesModel).map(nerEntityModel -> new NerEntities.NerEntity(nerEntityModel.getValue(),
|
||||
new Boundary(nerEntityModel.getStartOffset(), nerEntityModel.getEndOffset()),
|
||||
new TextRange(nerEntityModel.getStartOffset(), nerEntityModel.getEndOffset()),
|
||||
nerEntityModel.getType())).toList());
|
||||
}
|
||||
|
||||
@ -63,17 +63,17 @@ public class NerEntitiesAdapter {
|
||||
* @param allowDuplicates allow combining multiple parts of same type
|
||||
* @return A Stream of the combined boundaries
|
||||
*/
|
||||
public Stream<Boundary> combineNerEntities(NerEntities nerEntities,
|
||||
Set<String> essentialTypes,
|
||||
Set<String> typesToCombine,
|
||||
int maxDistanceBetweenParts,
|
||||
int minPartsToCombine,
|
||||
boolean allowDuplicates) {
|
||||
public Stream<TextRange> combineNerEntities(NerEntities nerEntities,
|
||||
Set<String> essentialTypes,
|
||||
Set<String> typesToCombine,
|
||||
int maxDistanceBetweenParts,
|
||||
int minPartsToCombine,
|
||||
boolean allowDuplicates) {
|
||||
|
||||
List<NerEntities.NerEntity> sortedEntities = nerEntities.getNerEntityList()
|
||||
.stream()
|
||||
.filter(entity -> typesToCombine.contains(entity.type()))
|
||||
.sorted(Comparator.comparingInt(entity -> entity.boundary().start()))
|
||||
.sorted(Comparator.comparingInt(entity -> entity.textRange().start()))
|
||||
.toList();
|
||||
|
||||
if (sortedEntities.isEmpty()) {
|
||||
@ -86,20 +86,20 @@ public class NerEntitiesAdapter {
|
||||
for (NerEntities.NerEntity startEntity : startEntitiesOfEssentialType) {
|
||||
List<NerEntities.NerEntity> currentCluster = new LinkedList<>();
|
||||
entityClusters.add(currentCluster);
|
||||
int lastEndOffset = startEntity.boundary().end();
|
||||
int lastEndOffset = startEntity.textRange().end();
|
||||
|
||||
for (NerEntities.NerEntity entity : sortedEntities) {
|
||||
if (entity.boundary().start() < lastEndOffset) {
|
||||
if (entity.textRange().start() < lastEndOffset) {
|
||||
continue;
|
||||
}
|
||||
if (distanceIsLargerThanMaxDistance(lastEndOffset, entity, maxDistanceBetweenParts) || isDuplicate(currentCluster, entity, allowDuplicates)) {
|
||||
currentCluster = new LinkedList<>();
|
||||
entityClusters.add(currentCluster);
|
||||
currentCluster.add(entity);
|
||||
lastEndOffset = entity.boundary().end();
|
||||
lastEndOffset = entity.textRange().end();
|
||||
} else {
|
||||
currentCluster.add(entity);
|
||||
lastEndOffset = entity.boundary().end();
|
||||
lastEndOffset = entity.textRange().end();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -120,7 +120,7 @@ public class NerEntitiesAdapter {
|
||||
*
|
||||
* @return A Stream of the combined entities of type "CBI_address"
|
||||
*/
|
||||
public Stream<Boundary> combineNerEntitiesToCbiAddressDefaults(NerEntities entityRecognitionEntities) {
|
||||
public Stream<TextRange> combineNerEntitiesToCbiAddressDefaults(NerEntities entityRecognitionEntities) {
|
||||
|
||||
return combineNerEntities(entityRecognitionEntities,
|
||||
CBI_ADDRESS_ESSENTIAL_TYPES,
|
||||
@ -139,13 +139,13 @@ public class NerEntitiesAdapter {
|
||||
|
||||
private static boolean distanceIsLargerThanMaxDistance(int lastEndOffset, NerEntities.NerEntity entity, int maxDistance) {
|
||||
|
||||
return (entity.boundary().start() - lastEndOffset) > maxDistance;
|
||||
return (entity.textRange().start() - lastEndOffset) > maxDistance;
|
||||
}
|
||||
|
||||
|
||||
private static Boundary toContainingBoundary(List<NerEntities.NerEntity> nerEntities) {
|
||||
private static TextRange toContainingBoundary(List<NerEntities.NerEntity> nerEntities) {
|
||||
|
||||
return Boundary.merge(nerEntities.stream().map(NerEntities.NerEntity::boundary).toList());
|
||||
return TextRange.merge(nerEntities.stream().map(NerEntities.NerEntity::textRange).toList());
|
||||
}
|
||||
|
||||
|
||||
@ -162,7 +162,7 @@ public class NerEntitiesAdapter {
|
||||
|
||||
private static List<Integer> getStringStartOffsetsForMainSections(Document document) {
|
||||
|
||||
return document.getMainSections().stream().map(Section::getTextBlock).map(TextBlock::getBoundary).map(Boundary::start).toList();
|
||||
return document.getMainSections().stream().map(Section::getTextBlock).map(TextBlock::getTextRange).map(TextRange::start).toList();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,73 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Getter
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class EntityIdentifier {
|
||||
|
||||
// must be used for comments to work correctly
|
||||
String id;
|
||||
String value;
|
||||
List<RectangleWithPage> entityPosition;
|
||||
String ruleIdentifier;
|
||||
String reason;
|
||||
String legalBasis;
|
||||
String type;
|
||||
String section;
|
||||
EntityType entityType;
|
||||
boolean applied;
|
||||
boolean isDictionaryEntry;
|
||||
boolean isDossierDictionaryEntry;
|
||||
boolean rectangle;
|
||||
|
||||
|
||||
public static EntityIdentifier fromRedactionLogEntry(RedactionLogEntry redactionLogEntry) {
|
||||
|
||||
String ruleIdentifier = redactionLogEntry.getType() + "." + redactionLogEntry.getMatchedRule() + ".0";
|
||||
List<RectangleWithPage> rectangleWithPages = redactionLogEntry.getPositions().stream().map(RectangleWithPage::fromRedactionLogRectangle).toList();
|
||||
return new EntityIdentifier(redactionLogEntry.getId(),
|
||||
redactionLogEntry.getValue(),
|
||||
rectangleWithPages,
|
||||
ruleIdentifier,
|
||||
redactionLogEntry.getReason(),
|
||||
redactionLogEntry.getLegalBasis(),
|
||||
redactionLogEntry.getType(),
|
||||
redactionLogEntry.getSection(),
|
||||
redactionLogEntry.isRecommendation() ? EntityType.RECOMMENDATION : EntityType.ENTITY,
|
||||
redactionLogEntry.isRedacted(),
|
||||
redactionLogEntry.isDictionaryEntry(),
|
||||
redactionLogEntry.isDossierDictionaryEntry(),
|
||||
redactionLogEntry.isRectangle());
|
||||
}
|
||||
|
||||
|
||||
public static EntityIdentifier fromManualRedactionEntry(ManualRedactionEntry manualRedactionEntry) {
|
||||
|
||||
List<RectangleWithPage> rectangleWithPages = manualRedactionEntry.getPositions().stream().map(RectangleWithPage::fromAnnotationRectangle).toList();
|
||||
return new EntityIdentifier(manualRedactionEntry.getAnnotationId(),
|
||||
manualRedactionEntry.getValue(),
|
||||
rectangleWithPages,
|
||||
"MAN.0.0",
|
||||
manualRedactionEntry.getReason(),
|
||||
manualRedactionEntry.getLegalBasis(),
|
||||
manualRedactionEntry.getType(),
|
||||
manualRedactionEntry.getSection(),
|
||||
EntityType.ENTITY,
|
||||
true,
|
||||
false,
|
||||
false,
|
||||
manualRedactionEntry.isRectangle());
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,91 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.PriorityQueue;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.ManualChangeOverwrite;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Getter
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class ManualEntity implements Entity {
|
||||
|
||||
// must be mapped into a TextEntity as is for comments to work correctly
|
||||
String id;
|
||||
String value;
|
||||
List<RectangleWithPage> entityPosition;
|
||||
String ruleIdentifier;
|
||||
String reason;
|
||||
String legalBasis;
|
||||
String type;
|
||||
String section;
|
||||
EntityType entityType;
|
||||
boolean applied;
|
||||
boolean isDictionaryEntry;
|
||||
boolean isDossierDictionaryEntry;
|
||||
boolean rectangle;
|
||||
|
||||
@Builder.Default
|
||||
PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
|
||||
@Builder.Default
|
||||
ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite();
|
||||
|
||||
|
||||
public static ManualEntity fromRedactionLogEntry(RedactionLogEntry redactionLogEntry) {
|
||||
|
||||
String ruleIdentifier = redactionLogEntry.getType() + "." + redactionLogEntry.getMatchedRule() + ".0";
|
||||
List<RectangleWithPage> rectangleWithPages = redactionLogEntry.getPositions().stream().map(RectangleWithPage::fromRedactionLogRectangle).toList();
|
||||
return ManualEntity.builder()
|
||||
.id(redactionLogEntry.getId())
|
||||
.value(redactionLogEntry.getValue())
|
||||
.entityPosition(rectangleWithPages)
|
||||
.ruleIdentifier(ruleIdentifier)
|
||||
.reason(redactionLogEntry.getReason())
|
||||
.legalBasis(redactionLogEntry.getLegalBasis())
|
||||
.type(redactionLogEntry.getType())
|
||||
.section(redactionLogEntry.getSection())
|
||||
.entityType(redactionLogEntry.isRecommendation() ? EntityType.RECOMMENDATION : EntityType.ENTITY)
|
||||
.applied(redactionLogEntry.isRedacted())
|
||||
.isDictionaryEntry(redactionLogEntry.isDictionaryEntry())
|
||||
.isDossierDictionaryEntry(redactionLogEntry.isDossierDictionaryEntry())
|
||||
.rectangle(redactionLogEntry.isRectangle())
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
public static ManualEntity fromManualRedactionEntry(ManualRedactionEntry manualRedactionEntry) {
|
||||
|
||||
List<RectangleWithPage> rectangleWithPages = manualRedactionEntry.getPositions().stream().map(RectangleWithPage::fromAnnotationRectangle).toList();
|
||||
ManualChangeOverwrite manualChangeOverwrite = new ManualChangeOverwrite();
|
||||
manualChangeOverwrite.addChange(manualRedactionEntry);
|
||||
return ManualEntity.builder()
|
||||
.id(manualRedactionEntry.getAnnotationId())
|
||||
.value(manualRedactionEntry.getValue())
|
||||
.entityPosition(rectangleWithPages)
|
||||
.ruleIdentifier("MAN.0.0")
|
||||
.reason(manualRedactionEntry.getReason())
|
||||
.legalBasis(manualRedactionEntry.getLegalBasis())
|
||||
.type(manualRedactionEntry.getType())
|
||||
.section(manualRedactionEntry.getSection())
|
||||
.entityType(EntityType.ENTITY)
|
||||
.applied(true)
|
||||
.isDictionaryEntry(false)
|
||||
.isDossierDictionaryEntry(false)
|
||||
.rectangle(manualRedactionEntry.isRectangle())
|
||||
.manualOverwrite(manualChangeOverwrite)
|
||||
.build();
|
||||
}
|
||||
|
||||
}
|
||||
@ -17,7 +17,7 @@ import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.exception.NotFoundException;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
|
||||
|
||||
import lombok.Data;
|
||||
@ -116,28 +116,28 @@ public class Dictionary {
|
||||
}
|
||||
|
||||
|
||||
public void recommendEverywhere(RedactionEntity redactionEntity) {
|
||||
public void recommendEverywhere(TextEntity textEntity) {
|
||||
|
||||
addLocalDictionaryEntry(redactionEntity.getType(), redactionEntity.getValue(), redactionEntity.getMatchedRuleList(), false);
|
||||
addLocalDictionaryEntry(textEntity.getType(), textEntity.getValue(), textEntity.getMatchedRuleList(), false);
|
||||
}
|
||||
|
||||
|
||||
public void recommendEverywhereWithLastNameSeparately(RedactionEntity redactionEntity) {
|
||||
public void recommendEverywhereWithLastNameSeparately(TextEntity textEntity) {
|
||||
|
||||
addLocalDictionaryEntry(redactionEntity.getType(), redactionEntity.getValue(), redactionEntity.getMatchedRuleList(), true);
|
||||
addLocalDictionaryEntry(textEntity.getType(), textEntity.getValue(), textEntity.getMatchedRuleList(), true);
|
||||
}
|
||||
|
||||
|
||||
public void addMultipleAuthorsAsRecommendation(RedactionEntity redactionEntity) {
|
||||
public void addMultipleAuthorsAsRecommendation(TextEntity textEntity) {
|
||||
|
||||
String cleanedWord = redactionEntity.getValue().replaceAll(",", " ").replaceAll(" ", " ").trim() + " ";
|
||||
String cleanedWord = textEntity.getValue().replaceAll(",", " ").replaceAll(" ", " ").trim() + " ";
|
||||
Pattern pattern = Patterns.AUTHOR_TABLE_SPLITTER;
|
||||
Matcher matcher = pattern.matcher(cleanedWord);
|
||||
|
||||
while (matcher.find()) {
|
||||
String match = matcher.group().trim();
|
||||
if (match.length() >= 3) {
|
||||
addLocalDictionaryEntry(redactionEntity.getType(), match, redactionEntity.getMatchedRuleList(), true);
|
||||
addLocalDictionaryEntry(textEntity.getType(), match, textEntity.getMatchedRuleList(), true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -9,7 +9,7 @@ import java.util.stream.Collectors;
|
||||
|
||||
import org.ahocorasick.trie.Trie;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@ -83,29 +83,29 @@ public class SearchImplementation {
|
||||
}
|
||||
|
||||
|
||||
public List<Boundary> getBoundaries(CharSequence text) {
|
||||
public List<TextRange> getBoundaries(CharSequence text) {
|
||||
|
||||
if (this.values.isEmpty()) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
if (this.pattern != null) {
|
||||
return this.pattern.matcher(text).results().map(r -> new Boundary(r.start(), r.end())).collect(Collectors.toList());
|
||||
return this.pattern.matcher(text).results().map(r -> new TextRange(r.start(), r.end())).collect(Collectors.toList());
|
||||
} else {
|
||||
return this.trie.parseText(text).stream().map(r -> new Boundary(r.getStart(), r.getEnd() + 1)).collect(Collectors.toList());
|
||||
return this.trie.parseText(text).stream().map(r -> new TextRange(r.getStart(), r.getEnd() + 1)).collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public List<Boundary> getBoundaries(CharSequence text, Boundary region) {
|
||||
public List<TextRange> getBoundaries(CharSequence text, TextRange region) {
|
||||
|
||||
if (this.values.isEmpty()) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
CharSequence subSequence = text.subSequence(region.start(), region.end());
|
||||
if (this.pattern != null) {
|
||||
return this.pattern.matcher(subSequence).results().map(r -> new Boundary(r.start() + region.start(), r.end() + region.start())).collect(Collectors.toList());
|
||||
return this.pattern.matcher(subSequence).results().map(r -> new TextRange(r.start() + region.start(), r.end() + region.start())).collect(Collectors.toList());
|
||||
} else {
|
||||
return this.trie.parseText(subSequence).stream().map(r -> new Boundary(r.getStart() + region.start(), r.getEnd() + region.start() + 1)).collect(Collectors.toList());
|
||||
return this.trie.parseText(subSequence).stream().map(r -> new TextRange(r.getStart() + region.start(), r.getEnd() + region.start() + 1)).collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -16,6 +16,7 @@ import com.iqser.gin4.commons.metrics.meters.FunctionTimerValues;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Comment;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.legalbasis.LegalBasis;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
|
||||
@ -29,7 +30,7 @@ import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.ManualEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryIncrement;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryVersion;
|
||||
@ -61,6 +62,7 @@ public class AnalyzeService {
|
||||
RedactionServiceSettings redactionServiceSettings;
|
||||
ImportedRedactionService importedRedactionService;
|
||||
SectionFinderService sectionFinderService;
|
||||
ManualRedactionEntryService manualRedactionEntryService;
|
||||
|
||||
FunctionTimerValues redactmanagerAnalyzePagewiseValues;
|
||||
|
||||
@ -82,7 +84,7 @@ public class AnalyzeService {
|
||||
var wrapper = droolsExecutionService.getLatestKieContainer(analyzeRequest.getDossierTemplateId());
|
||||
log.info("Updated Rules to Version {} for file {} in dossier {}", wrapper.rulesVersion(), analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
List<EntityIdentifier> notFoundManualRedactionEntries = addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest, document);
|
||||
List<ManualEntity> notFoundManualRedactionEntries = manualRedactionEntryService.addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest, document);
|
||||
|
||||
entityRedactionService.addDictionaryEntities(dictionary, document);
|
||||
log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
@ -90,7 +92,10 @@ public class AnalyzeService {
|
||||
Set<FileAttribute> addedFileAttributes = entityRedactionService.addRuleEntities(dictionary, document, wrapper.container(), analyzeRequest, nerEntities);
|
||||
log.info("Finished Rule Execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
List<RedactionLogEntry> redactionLogEntries = redactionLogCreatorService.createRedactionLog(document, analyzeRequest.getDossierTemplateId(), notFoundManualRedactionEntries);
|
||||
List<RedactionLogEntry> redactionLogEntries = redactionLogCreatorService.createRedactionLog(document,
|
||||
analyzeRequest.getDossierTemplateId(),
|
||||
notFoundManualRedactionEntries,
|
||||
getComments(analyzeRequest));
|
||||
|
||||
List<LegalBasis> legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId());
|
||||
RedactionLog redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(),
|
||||
@ -109,24 +114,19 @@ public class AnalyzeService {
|
||||
true);
|
||||
redactionLog.setRedactionLogEntry(importedRedactionFilteredEntries);
|
||||
|
||||
return finalizeAnalysis(analyzeRequest,
|
||||
startTime,
|
||||
redactionLog,
|
||||
document.getNumberOfPages(),
|
||||
dictionary.getVersion(),
|
||||
false,
|
||||
addedFileAttributes);
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, document.getNumberOfPages(), dictionary.getVersion(), false, addedFileAttributes);
|
||||
}
|
||||
|
||||
|
||||
private List<EntityIdentifier> addManualRedactionEntriesAndReturnNotFoundEntries(AnalyzeRequest analyzeRequest, Document document) {
|
||||
private static Map<String, List<Comment>> getComments(AnalyzeRequest analyzeRequest) {
|
||||
|
||||
List<EntityIdentifier> notFoundManualRedactionEntries = Collections.emptyList();
|
||||
if (analyzeRequest.getManualRedactions() != null) {
|
||||
notFoundManualRedactionEntries = entityRedactionService.addManualAddRedactionEntities(analyzeRequest.getManualRedactions().getEntriesToAdd(), document);
|
||||
log.info("Added Manual redaction entries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
if (analyzeRequest.getManualRedactions() == null) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
return notFoundManualRedactionEntries;
|
||||
if (analyzeRequest.getManualRedactions().getComments() == null) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
return analyzeRequest.getManualRedactions().getComments();
|
||||
}
|
||||
|
||||
|
||||
@ -170,7 +170,7 @@ public class AnalyzeService {
|
||||
var wrapper = droolsExecutionService.getLatestKieContainer(analyzeRequest.getDossierTemplateId());
|
||||
log.info("Updated Rules to version {} for file {} in dossier {}", wrapper.rulesVersion(), analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
List<EntityIdentifier> notFoundManualRedactionEntries = addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest, document);
|
||||
List<ManualEntity> notFoundManualRedactionEntries = manualRedactionEntryService.addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest, document);
|
||||
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
log.info("Updated Dictionaries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
@ -186,7 +186,10 @@ public class AnalyzeService {
|
||||
nerEntities);
|
||||
log.info("Finished Rule Execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
List<RedactionLogEntry> newRedactionLogEntries = redactionLogCreatorService.createRedactionLog(document, analyzeRequest.getDossierTemplateId(), notFoundManualRedactionEntries);
|
||||
List<RedactionLogEntry> newRedactionLogEntries = redactionLogCreatorService.createRedactionLog(document,
|
||||
analyzeRequest.getDossierTemplateId(),
|
||||
notFoundManualRedactionEntries,
|
||||
getComments(analyzeRequest));
|
||||
|
||||
var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(),
|
||||
analyzeRequest.getDossierId(),
|
||||
|
||||
@ -36,7 +36,7 @@ import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.KieWrapper;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary;
|
||||
@ -84,11 +84,11 @@ public class DroolsExecutionService {
|
||||
|
||||
KieSession kieSession = kieContainer.newKieSession();
|
||||
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService, kieSession);
|
||||
ManualRedactionApplicationService manualRedactionApplicationService = new ManualRedactionApplicationService(entityCreationService);
|
||||
ManualChangesApplicationService manualChangesApplicationService = new ManualChangesApplicationService(entityCreationService);
|
||||
|
||||
kieSession.setGlobal("document", document);
|
||||
kieSession.setGlobal("entityCreationService", entityCreationService);
|
||||
kieSession.setGlobal("manualRedactionApplicationService", manualRedactionApplicationService);
|
||||
kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService);
|
||||
kieSession.setGlobal("dictionary", dictionary);
|
||||
|
||||
kieSession.insert(document);
|
||||
|
||||
@ -9,16 +9,14 @@ import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.CustomEntityCreationAdapter;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.CustomEntityCreationAdapter;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.SearchImplementation;
|
||||
|
||||
@ -74,12 +72,6 @@ public class EntityRedactionService {
|
||||
return allFileAttributes.stream().filter(fileAttribute -> !analyzeRequest.getFileAttributes().contains(fileAttribute)).collect(Collectors.toUnmodifiableSet());
|
||||
}
|
||||
|
||||
public List<EntityIdentifier> addManualAddRedactionEntities(Set<ManualRedactionEntry> manualRedactionEntries, Document document) {
|
||||
|
||||
// Entities are automatically added to the DocumentGraph and don't need to be inserted again.
|
||||
return customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(manualRedactionEntries, document);
|
||||
}
|
||||
|
||||
|
||||
public void addDictionaryEntities(Dictionary dictionary, SemanticNode node) {
|
||||
|
||||
@ -98,7 +90,7 @@ public class EntityRedactionService {
|
||||
boolean isDossierDictionaryEntry) {
|
||||
|
||||
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService);
|
||||
searchImplementation.getBoundaries(node.getTextBlock(), node.getBoundary())
|
||||
searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange())
|
||||
.stream()
|
||||
.filter(boundary -> entityCreationService.isValidEntityBoundary(node.getTextBlock(), boundary))
|
||||
.map(bounds -> entityCreationService.forceByBoundary(bounds, type, entityType, node))
|
||||
|
||||
@ -0,0 +1,43 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualChange;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualRedactionType;
|
||||
|
||||
@Service
|
||||
public class ManualChangeFactory {
|
||||
|
||||
public List<ManualChange> toManualChangeList(List<BaseAnnotation> manualChanges, boolean isHint) {
|
||||
|
||||
return manualChanges.stream().map(baseAnnotation -> toManualChange(baseAnnotation, isHint)).toList();
|
||||
}
|
||||
|
||||
|
||||
private ManualChange toManualChange(BaseAnnotation baseAnnotation, boolean isHint) {
|
||||
|
||||
ManualChange manualChange = ManualChange.from(baseAnnotation);
|
||||
if (baseAnnotation instanceof ManualImageRecategorization imageRecategorization) {
|
||||
manualChange.withManualRedactionType(ManualRedactionType.RECATEGORIZE).withChange("type", imageRecategorization.getType());
|
||||
} else if (baseAnnotation instanceof IdRemoval manualRemoval) {
|
||||
manualChange.withManualRedactionType(manualRemoval.isRemoveFromDictionary() ? ManualRedactionType.REMOVE_FROM_DICTIONARY : ManualRedactionType.REMOVE_LOCALLY);
|
||||
} else if (baseAnnotation instanceof ManualForceRedaction) {
|
||||
manualChange.withManualRedactionType(isHint ? ManualRedactionType.FORCE_HINT : ManualRedactionType.FORCE_REDACT);
|
||||
} else if (baseAnnotation instanceof ManualResizeRedaction manualResizeRedact) {
|
||||
manualChange.withManualRedactionType(ManualRedactionType.RESIZE).withChange("value", manualResizeRedact.getValue());
|
||||
} else if (baseAnnotation instanceof ManualRedactionEntry manualRedactionEntry) {
|
||||
manualChange.withManualRedactionType(manualRedactionEntry.isAddToDictionary() ? ManualRedactionType.ADD_TO_DICTIONARY : ManualRedactionType.ADD_LOCALLY)
|
||||
.withChange("value", manualRedactionEntry.getValue());
|
||||
}
|
||||
return manualChange;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,58 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.CustomEntityCreationAdapter;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.ManualEntity;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class ManualRedactionEntryService {
|
||||
|
||||
private final CustomEntityCreationAdapter customEntityCreationAdapter;
|
||||
|
||||
|
||||
public List<ManualEntity> addManualRedactionEntriesAndReturnNotFoundEntries(AnalyzeRequest analyzeRequest, Document document) {
|
||||
|
||||
List<ManualEntity> notFoundManualRedactionEntries = Collections.emptyList();
|
||||
if (analyzeRequest.getManualRedactions() != null) {
|
||||
notFoundManualRedactionEntries = customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(analyzeRequest.getManualRedactions()
|
||||
.getEntriesToAdd(), document);
|
||||
log.info("Added Manual redaction entries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
}
|
||||
if (notFoundManualRedactionEntries.isEmpty()) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
List<BaseAnnotation> manualChanges = allManualChangesExceptAdd(analyzeRequest.getManualRedactions());
|
||||
for (ManualEntity notFoundManualRedactionEntry : notFoundManualRedactionEntries) {
|
||||
manualChanges.stream()
|
||||
.filter(change -> change.getAnnotationId().equals(notFoundManualRedactionEntry.getId()))
|
||||
.forEach(change -> notFoundManualRedactionEntry.getManualOverwrite().addChange(change));
|
||||
}
|
||||
return notFoundManualRedactionEntries;
|
||||
}
|
||||
|
||||
|
||||
private List<BaseAnnotation> allManualChangesExceptAdd(ManualRedactions manualRedactions) {
|
||||
|
||||
return Stream.of(manualRedactions.getForceRedactions(),
|
||||
manualRedactions.getResizeRedactions(),
|
||||
manualRedactions.getImageRecategorization(),
|
||||
manualRedactions.getIdsToRemove(),
|
||||
manualRedactions.getLegalBasisChanges()).flatMap(Collection::stream).map(baseAnnotation -> (BaseAnnotation) baseAnnotation).toList();
|
||||
}
|
||||
|
||||
}
|
||||
@ -6,22 +6,24 @@ import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Comment;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogComment;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.PositionOnPage;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.RectangleWithPage;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.ManualEntity;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@ -32,49 +34,52 @@ import lombok.extern.slf4j.Slf4j;
|
||||
public class RedactionLogCreatorService {
|
||||
|
||||
private final DictionaryService dictionaryService;
|
||||
private final ManualChangeFactory manualChangeFactory;
|
||||
|
||||
|
||||
public List<RedactionLogEntry> createRedactionLog(Document document, String dossierTemplateId, List<EntityIdentifier> notFoundManualRedactionEntries) {
|
||||
public List<RedactionLogEntry> createRedactionLog(Document document,
|
||||
String dossierTemplateId,
|
||||
List<ManualEntity> notFoundManualRedactionEntries,
|
||||
Map<String, List<Comment>> comments) {
|
||||
|
||||
List<RedactionLogEntry> entries = new ArrayList<>();
|
||||
Set<String> processedIds = new HashSet<>();
|
||||
document.getEntities()
|
||||
.stream()
|
||||
.filter(RedactionLogCreatorService::isEntityOrRecommendationType)
|
||||
.filter(entity -> !entity.isRemoved())
|
||||
.forEach(entityNode -> entries.addAll(toRedactionLogEntries(entityNode, processedIds, dossierTemplateId)));
|
||||
document.streamAllImages().filter(image -> !image.isRemoved()).forEach(imageNode -> entries.add(createRedactionLogEntry(imageNode, dossierTemplateId)));
|
||||
notFoundManualRedactionEntries.forEach(entityIdentifier -> entries.add(createRedactionLogEntry(entityIdentifier, dossierTemplateId)));
|
||||
.filter(entity -> !entity.removed())
|
||||
.forEach(entityNode -> entries.addAll(toRedactionLogEntries(entityNode, dossierTemplateId, comments)));
|
||||
document.streamAllImages().filter(image -> !image.removed()).forEach(imageNode -> entries.add(createRedactionLogEntry(imageNode, dossierTemplateId, comments)));
|
||||
notFoundManualRedactionEntries.forEach(entityIdentifier -> entries.add(createRedactionLogEntry(entityIdentifier, dossierTemplateId, comments)));
|
||||
return entries;
|
||||
}
|
||||
|
||||
|
||||
private static boolean isEntityOrRecommendationType(RedactionEntity redactionEntity) {
|
||||
private static boolean isEntityOrRecommendationType(TextEntity textEntity) {
|
||||
|
||||
return redactionEntity.getEntityType() == EntityType.ENTITY || redactionEntity.getEntityType() == EntityType.RECOMMENDATION;
|
||||
return textEntity.getEntityType() == EntityType.ENTITY || textEntity.getEntityType() == EntityType.RECOMMENDATION;
|
||||
}
|
||||
|
||||
|
||||
private List<RedactionLogEntry> toRedactionLogEntries(RedactionEntity redactionEntity, Set<String> processedIds, String dossierTemplateId) {
|
||||
private List<RedactionLogEntry> toRedactionLogEntries(TextEntity textEntity, String dossierTemplateId, Map<String, List<Comment>> comments) {
|
||||
|
||||
Set<String> processedIds = new HashSet<>();
|
||||
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
|
||||
|
||||
// Duplicates can exist due table extraction columns over multiple rows.
|
||||
for (PositionOnPage positionOnPage : textEntity.getPositionsOnPagePerPage()) {
|
||||
|
||||
for (RedactionPosition redactionPosition : redactionEntity.getRedactionPositionsPerPage()) {
|
||||
|
||||
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(redactionEntity, dossierTemplateId);
|
||||
|
||||
if (processedIds.contains(redactionPosition.getId())) {
|
||||
// Duplicates should be removed. They might exist due to table extraction duplicating cells spanning multiple columns/rows.
|
||||
if (processedIds.contains(positionOnPage.getId())) {
|
||||
continue;
|
||||
}
|
||||
processedIds.add(positionOnPage.getId());
|
||||
|
||||
processedIds.add(redactionPosition.getId());
|
||||
redactionLogEntry.setId(redactionPosition.getId());
|
||||
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(textEntity, dossierTemplateId);
|
||||
redactionLogEntry.setId(positionOnPage.getId());
|
||||
redactionLogEntry.setComments(buildRedactionLogComments(comments, positionOnPage.getId()));
|
||||
|
||||
List<Rectangle> rectanglesPerLine = redactionPosition.getRectanglePerLine()
|
||||
List<Rectangle> rectanglesPerLine = positionOnPage.getRectanglePerLine()
|
||||
.stream()
|
||||
.map(rectangle2D -> RectangleTransformations.toRedactionLogRectangle(rectangle2D, redactionPosition.getPage().getNumber()))
|
||||
.map(rectangle2D -> toRedactionLogRectangle(rectangle2D, positionOnPage.getPage().getNumber()))
|
||||
.toList();
|
||||
|
||||
redactionLogEntry.setPositions(rectanglesPerLine);
|
||||
@ -85,88 +90,128 @@ public class RedactionLogCreatorService {
|
||||
}
|
||||
|
||||
|
||||
private RedactionLogEntry createRedactionLogEntry(RedactionEntity entity, String dossierTemplateId) {
|
||||
private List<RedactionLogComment> buildRedactionLogComments(Map<String, List<Comment>> commentsPerId, String id) {
|
||||
|
||||
if (!commentsPerId.containsKey(id)) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
List<Comment> comments = commentsPerId.get(id);
|
||||
if (comments == null || comments.isEmpty()) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
return toRedactionLogComments(comments);
|
||||
}
|
||||
|
||||
|
||||
private List<RedactionLogComment> toRedactionLogComments(List<Comment> comments) {
|
||||
|
||||
return comments.stream().map(this::toRedactionLogComment).toList();
|
||||
}
|
||||
|
||||
|
||||
private RedactionLogComment toRedactionLogComment(Comment comment) {
|
||||
|
||||
return new RedactionLogComment(comment.getId(),
|
||||
comment.getUser(),
|
||||
comment.getText(),
|
||||
comment.getAnnotationId(),
|
||||
comment.getFileId(),
|
||||
comment.getDate(),
|
||||
comment.getSoftDeletedTime());
|
||||
}
|
||||
|
||||
|
||||
private RedactionLogEntry createRedactionLogEntry(TextEntity entity, String dossierTemplateId) {
|
||||
|
||||
Set<String> referenceIds = new HashSet<>();
|
||||
entity.getReferences().stream().filter(RedactionEntity::isActive).forEach(ref -> ref.getRedactionPositionsPerPage().forEach(pos -> referenceIds.add(pos.getId())));
|
||||
entity.references().stream().filter(TextEntity::active).forEach(ref -> ref.getPositionsOnPagePerPage().forEach(pos -> referenceIds.add(pos.getId())));
|
||||
int sectionNumber = entity.getDeepestFullyContainingNode().getTreeId().isEmpty() ? 0 : entity.getDeepestFullyContainingNode().getTreeId().get(0);
|
||||
|
||||
boolean isHint = isHint(entity.getType(), dossierTemplateId);
|
||||
return RedactionLogEntry.builder()
|
||||
.color(getColor(entity.getType(), dossierTemplateId, entity.isApplied()))
|
||||
.reason(entity.getMatchedRule().getReason())
|
||||
.legalBasis(entity.getMatchedRule().getLegalBasis())
|
||||
.value(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue())
|
||||
.color(getColor(entity.getType(), dossierTemplateId, entity.applied()))
|
||||
.reason(entity.buildReasonWithManualChangeDescriptions())
|
||||
.legalBasis(entity.legalBasis())
|
||||
.value(entity.getManualOverwrite().getValue().orElse(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue()))
|
||||
.type(entity.getType())
|
||||
.redacted(entity.isApplied())
|
||||
.isHint(isHint(entity.getType(), dossierTemplateId))
|
||||
.redacted(entity.applied())
|
||||
.isHint(isHint)
|
||||
.isRecommendation(entity.getEntityType().equals(EntityType.RECOMMENDATION))
|
||||
.isFalsePositive(entity.getEntityType().equals(EntityType.FALSE_POSITIVE) || entity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION))
|
||||
.section(entity.getDeepestFullyContainingNode().toString())
|
||||
.section(entity.getManualOverwrite().getSection().orElse(entity.getDeepestFullyContainingNode().toString()))
|
||||
.sectionNumber(sectionNumber)
|
||||
.matchedRule(entity.getMatchedRule().getRuleIdentifier().toString())
|
||||
.isDictionaryEntry(entity.isDictionaryEntry())
|
||||
.textAfter(entity.getTextAfter())
|
||||
.textBefore(entity.getTextBefore())
|
||||
.startOffset(entity.getBoundary().start())
|
||||
.endOffset(entity.getBoundary().end())
|
||||
.startOffset(entity.getTextRange().start())
|
||||
.endOffset(entity.getTextRange().end())
|
||||
.isDossierDictionaryEntry(entity.isDossierDictionaryEntry())
|
||||
.engines(entity.getEngines() != null ? entity.getEngines() : Collections.emptySet())
|
||||
.reference(referenceIds)
|
||||
.manualChanges(manualChangeFactory.toManualChangeList(entity.getManualOverwrite().getManualChangeLog(), isHint))
|
||||
.build();
|
||||
}
|
||||
|
||||
public RedactionLogEntry createRedactionLogEntry(EntityIdentifier entityIdentifier, String dossierTemplateId) {
|
||||
List<Integer> pageNumbers = entityIdentifier.getEntityPosition().stream().map(RectangleWithPage::pageNumber).toList();
|
||||
List<Rectangle2D> rectanglesPerLine = entityIdentifier.getEntityPosition().stream().map(RectangleWithPage::rectangle2D).toList();
|
||||
|
||||
public RedactionLogEntry createRedactionLogEntry(ManualEntity manualEntity, String dossierTemplateId, Map<String, List<Comment>> comments) {
|
||||
|
||||
String type = manualEntity.getManualOverwrite().getType().orElse(manualEntity.getType());
|
||||
boolean isHint = isHint(type, dossierTemplateId);
|
||||
return RedactionLogEntry.builder()
|
||||
.id(entityIdentifier.getId())
|
||||
.color(getColor(entityIdentifier.getType(), dossierTemplateId, entityIdentifier.isApplied()))
|
||||
.reason(entityIdentifier.getReason())
|
||||
.legalBasis(entityIdentifier.getLegalBasis())
|
||||
.value(entityIdentifier.getValue())
|
||||
.type(entityIdentifier.getType())
|
||||
.redacted(entityIdentifier.isApplied())
|
||||
.isHint(isHint(entityIdentifier.getType(), dossierTemplateId))
|
||||
.isRecommendation(entityIdentifier.getEntityType().equals(EntityType.RECOMMENDATION))
|
||||
.isFalsePositive(entityIdentifier.getEntityType().equals(EntityType.FALSE_POSITIVE) || entityIdentifier.getEntityType().equals(EntityType.FALSE_RECOMMENDATION))
|
||||
.section(entityIdentifier.getSection())
|
||||
.id(manualEntity.getId())
|
||||
.color(getColor(type, dossierTemplateId, manualEntity.applied()))
|
||||
.reason(manualEntity.buildReasonWithManualChangeDescriptions())
|
||||
.legalBasis(manualEntity.legalBasis())
|
||||
.value(manualEntity.getManualOverwrite().getValue().orElse(manualEntity.getValue()))
|
||||
.type(type)
|
||||
.redacted(manualEntity.applied())
|
||||
.isHint(isHint)
|
||||
.isRecommendation(manualEntity.getEntityType().equals(EntityType.RECOMMENDATION))
|
||||
.isFalsePositive(manualEntity.getEntityType().equals(EntityType.FALSE_POSITIVE) || manualEntity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION))
|
||||
.section(manualEntity.getManualOverwrite().getSection().orElse(manualEntity.getSection()))
|
||||
.sectionNumber(0)
|
||||
.matchedRule("ManualRedaction")
|
||||
.rectangle(entityIdentifier.isRectangle())
|
||||
.isDictionaryEntry(entityIdentifier.isDictionaryEntry())
|
||||
.rectangle(manualEntity.isRectangle())
|
||||
.isDictionaryEntry(manualEntity.isDictionaryEntry())
|
||||
.isDossierDictionaryEntry(manualEntity.isDossierDictionaryEntry())
|
||||
.textAfter("")
|
||||
.textBefore("")
|
||||
.startOffset(-1)
|
||||
.endOffset(-1)
|
||||
.isDossierDictionaryEntry(entityIdentifier.isDossierDictionaryEntry())
|
||||
.positions(entityIdentifier.getEntityPosition()
|
||||
.positions(manualEntity.getEntityPosition()
|
||||
.stream()
|
||||
.map(entityPosition -> RectangleTransformations.toRedactionLogRectangle(entityPosition.rectangle2D(), entityPosition.pageNumber()))
|
||||
.map(entityPosition -> toRedactionLogRectangle(entityPosition.rectangle2D(), entityPosition.pageNumber()))
|
||||
.collect(Collectors.toList()))
|
||||
.engines(Collections.emptySet())
|
||||
.reference(Collections.emptySet())
|
||||
.manualChanges(manualChangeFactory.toManualChangeList(manualEntity.getManualOverwrite().getManualChangeLog(), isHint))
|
||||
.comments(buildRedactionLogComments(comments, manualEntity.getId()))
|
||||
.build();
|
||||
}
|
||||
|
||||
public RedactionLogEntry createRedactionLogEntry(Image image, String dossierTemplateId) {
|
||||
|
||||
String imageType = image.getImageType().equals(ImageType.OTHER) ? "image" : image.getImageType().toString().toLowerCase(Locale.ROOT);
|
||||
public RedactionLogEntry createRedactionLogEntry(Image image, String dossierTemplateId, Map<String, List<Comment>> comments) {
|
||||
|
||||
String imageType = image.getImageType().equals(ImageType.OTHER) ? "image" : image.getImageType().toString().toLowerCase(Locale.ENGLISH);
|
||||
boolean isHint = dictionaryService.isHint(imageType, dossierTemplateId);
|
||||
return RedactionLogEntry.builder()
|
||||
.id(image.getId())
|
||||
.color(getColor(image.getImageType().toString().toLowerCase(Locale.ROOT), dossierTemplateId, image.isApplied()))
|
||||
.color(getColor(imageType, dossierTemplateId, image.applied()))
|
||||
.isImage(true)
|
||||
.type(imageType)
|
||||
.redacted(image.isApplied())
|
||||
.reason(image.getMatchedRule().getReason())
|
||||
.legalBasis(image.getMatchedRule().getLegalBasis())
|
||||
.redacted(image.applied())
|
||||
.reason(image.buildReasonWithManualChangeDescriptions())
|
||||
.legalBasis(image.legalBasis())
|
||||
.matchedRule(image.getMatchedRule().getRuleIdentifier().toString())
|
||||
.isHint(dictionaryService.isHint(image.getImageType().toString().toLowerCase(Locale.ROOT), dossierTemplateId))
|
||||
.isHint(isHint)
|
||||
.isDictionaryEntry(false)
|
||||
.isRecommendation(false)
|
||||
.positions(List.of(RectangleTransformations.toRedactionLogRectangle(image.getPosition(), image.getPage().getNumber())))
|
||||
.positions(List.of(toRedactionLogRectangle(image.getPosition(), image.getPage().getNumber())))
|
||||
.sectionNumber(image.getTreeId().get(0))
|
||||
.section(image.getParent().toString())
|
||||
.section(image.getManualOverwrite().getSection().orElse(image.getParent().toString()))
|
||||
.imageHasTransparency(image.isTransparent())
|
||||
.manualChanges(manualChangeFactory.toManualChangeList(image.getManualOverwrite().getManualChangeLog(), isHint))
|
||||
.comments(buildRedactionLogComments(comments, image.getId()))
|
||||
.build();
|
||||
|
||||
}
|
||||
@ -186,4 +231,13 @@ public class RedactionLogCreatorService {
|
||||
return dictionaryService.isHint(type, dossierTemplateId);
|
||||
}
|
||||
|
||||
|
||||
private Rectangle toRedactionLogRectangle(Rectangle2D rectangle2D, int pageNumber) {
|
||||
|
||||
return new Rectangle(new Point((float) rectangle2D.getMinX(), (float) (rectangle2D.getMinY() + rectangle2D.getHeight())),
|
||||
(float) rectangle2D.getWidth(),
|
||||
-(float) rectangle2D.getHeight(),
|
||||
pageNumber);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -3,7 +3,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
@ -14,7 +14,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
public final class SeparatorUtils {
|
||||
|
||||
private final static Pattern punctuationPattern = Pattern.compile("\\p{Punct}");
|
||||
private final static Set<Character> quotes = Set.of('\'', '\u0022', '\u00AB', '\u00BB', '\u2018', '\u2019', '\u201A', '\u201C', '\u201D', '\u201E', '\u2039', '\u203A');
|
||||
private final static Set<Character> quotes = Set.of('\'', '"', '«', '»', '‘', '’', '‚', '“', '”', '„', '‹', '›');
|
||||
|
||||
private final static Set<Integer> japaneseAltPunctuationMarks = Set.of(65288, 65289, 65294, 65339, 65341, 65371, 65373, 65375, 65376, 12443, 12444, 65309, 65306);
|
||||
|
||||
@ -32,9 +32,9 @@ public final class SeparatorUtils {
|
||||
}
|
||||
|
||||
|
||||
public static boolean isWhiteSpacesOrSeparatorsOnly(TextBlock textBlock, Boundary boundary) {
|
||||
public static boolean isWhiteSpacesOrSeparatorsOnly(TextBlock textBlock, TextRange textRange) {
|
||||
|
||||
String stringWithoutWhiteSpace = textBlock.subSequence(boundary).toString().replace(" ", "");
|
||||
String stringWithoutWhiteSpace = textBlock.subSequence(textRange).toString().replace(" ", "");
|
||||
int numberOfSeparators = 0;
|
||||
for (int i = 0; i < stringWithoutWhiteSpace.length(); i++) {
|
||||
if (isSeparator(stringWithoutWhiteSpace.charAt(i))) {
|
||||
@ -45,25 +45,25 @@ public final class SeparatorUtils {
|
||||
}
|
||||
|
||||
|
||||
public static boolean boundaryIsSurroundedBySeparators(TextBlock textBlock, Boundary boundary) {
|
||||
public static boolean boundaryIsSurroundedBySeparators(TextBlock textBlock, TextRange textRange) {
|
||||
|
||||
return validateStart(textBlock, boundary) && validateEnd(textBlock, boundary) && !isWhiteSpacesOrSeparatorsOnly(textBlock, boundary);
|
||||
return validateStart(textBlock, textRange) && validateEnd(textBlock, textRange) && !isWhiteSpacesOrSeparatorsOnly(textBlock, textRange);
|
||||
}
|
||||
|
||||
|
||||
private static boolean validateEnd(TextBlock textBlock, Boundary boundary) {
|
||||
private static boolean validateEnd(TextBlock textBlock, TextRange textRange) {
|
||||
|
||||
return boundary.end() == textBlock.getBoundary().end() ||//
|
||||
SeparatorUtils.isSeparator(textBlock.charAt(boundary.end())) ||//
|
||||
SeparatorUtils.isJapaneseSeparator(textBlock.charAt(boundary.end() - 1));
|
||||
return textRange.end() == textBlock.getTextRange().end() ||//
|
||||
SeparatorUtils.isSeparator(textBlock.charAt(textRange.end())) ||//
|
||||
SeparatorUtils.isJapaneseSeparator(textBlock.charAt(textRange.end() - 1));
|
||||
}
|
||||
|
||||
|
||||
private static boolean validateStart(TextBlock textBlock, Boundary boundary) {
|
||||
private static boolean validateStart(TextBlock textBlock, TextRange textRange) {
|
||||
|
||||
return boundary.start() == textBlock.getBoundary().start() ||//
|
||||
SeparatorUtils.isSeparator(textBlock.charAt(boundary.start() - 1)) ||//
|
||||
SeparatorUtils.isJapaneseSeparator(textBlock.charAt(boundary.start()));
|
||||
return textRange.start() == textBlock.getTextRange().start() ||//
|
||||
SeparatorUtils.isSeparator(textBlock.charAt(textRange.start() - 1)) ||//
|
||||
SeparatorUtils.isJapaneseSeparator(textBlock.charAt(textRange.start()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -5,10 +5,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.mockito.ArgumentMatchers.anyLong;
|
||||
import static org.mockito.ArgumentMatchers.anyString;
|
||||
import static org.mockito.Mockito.when;
|
||||
import static org.wildfly.common.Assert.assertTrue;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
@ -19,7 +16,6 @@ import java.nio.file.Paths;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@ -31,7 +27,6 @@ import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
@ -62,21 +57,13 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSON
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.configuration.Colors;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ChangeType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.document.data.mapper.DocumentGraphMapper;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
@ -93,11 +80,6 @@ import lombok.SneakyThrows;
|
||||
public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
private static final String RULES = loadFromClassPath("drools/rules.drl");
|
||||
@Autowired
|
||||
private EntityEnrichmentService entityEnrichmentService;
|
||||
|
||||
@Autowired
|
||||
private DroolsExecutionService droolsExecutionService;
|
||||
|
||||
@Configuration
|
||||
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
|
||||
@ -113,11 +95,6 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
|
||||
}
|
||||
|
||||
}
|
||||
@BeforeEach
|
||||
public void invalidateCaches() {
|
||||
|
||||
// droolsExecutionService.invalidateKieContainerCache();
|
||||
}
|
||||
|
||||
|
||||
@BeforeEach
|
||||
@ -234,7 +211,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
|
||||
@Test
|
||||
public void titleExtraction() throws IOException {
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/new/crafted document.pdf");
|
||||
System.out.println("Start Full integration test");
|
||||
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
|
||||
System.out.println("Finished structure analysis");
|
||||
@ -393,7 +370,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
|
||||
continue loop;
|
||||
}
|
||||
if (redactionLogEntry.getSectionNumber() == section.getTreeId().get(0)) {
|
||||
String value = section.getTextBlock().subSequence(new Boundary(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset())).toString();
|
||||
String value = section.getTextBlock().subSequence(new TextRange(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset())).toString();
|
||||
if (redactionLogEntry.getValue().equalsIgnoreCase(value)) {
|
||||
correctFound++;
|
||||
} else {
|
||||
@ -542,7 +519,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
|
||||
List<String> valuesInDocument = redactionLog.getRedactionLogEntry()
|
||||
.stream()
|
||||
.filter(e -> !e.isImage())
|
||||
.map(redactionLogEntry -> new Boundary(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset()))
|
||||
.map(redactionLogEntry -> new TextRange(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset()))
|
||||
.map(boundary -> documentGraph.getTextBlock().subSequence(boundary).toString())
|
||||
.toList();
|
||||
List<String> valuesInRedactionLog = redactionLog.getRedactionLogEntry().stream().filter(e -> !e.isImage()).map(RedactionLogEntry::getValue).toList();
|
||||
@ -697,75 +674,6 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testManualResizeRedactionRemovesContainedEntities() {
|
||||
|
||||
String filePath = "files/new/crafted document.pdf";
|
||||
AnalyzeRequest request = uploadFileToStorage(filePath);
|
||||
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
String testEntityValue1 = "Desiree";
|
||||
String testEntityValue2 = "Melanie";
|
||||
RedactionLog redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
assertEquals(2, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue1)).count());
|
||||
assertEquals(2, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue2)).count());
|
||||
|
||||
Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(TEST_DOSSIER_ID, TEST_FILE_ID));
|
||||
String expandedEntityKeyword = "Lorem ipsum dolor sit amet, consectetur adipiscing elit Desiree et al sed do eiusmod tempor incididunt ut labore et dolore magna aliqua Melanie et al. Reference No 12345 Lorem ipsum.";
|
||||
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService);
|
||||
RedactionEntity expandedEntity = entityCreationService.byString(expandedEntityKeyword, "PII", EntityType.ENTITY, document).findFirst().get();
|
||||
|
||||
String idToResize = redactionLog.getRedactionLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> entry.getValue().equals(testEntityValue1))
|
||||
.max(Comparator.comparingInt(RedactionLogEntry::getStartOffset))
|
||||
.get()
|
||||
.getId();
|
||||
List<Rectangle> resizedPositions = expandedEntity.getRedactionPositionsPerPage()
|
||||
.get(0)
|
||||
.getRectanglePerLine()
|
||||
.stream()
|
||||
.map(rectangle2D -> toAnnotationRectangle(rectangle2D, 3))
|
||||
.toList();
|
||||
ManualResizeRedaction manualResizeRedaction = ManualResizeRedaction.builder()
|
||||
.annotationId(idToResize)
|
||||
.value(expandedEntityKeyword)
|
||||
.positions(resizedPositions)
|
||||
.status(AnnotationStatus.APPROVED)
|
||||
.build();
|
||||
|
||||
ManualRedactions manualRedactions = new ManualRedactions();
|
||||
manualRedactions.getResizeRedactions().add(manualResizeRedaction);
|
||||
request.setManualRedactions(manualRedactions);
|
||||
AnalyzeResult reanalyzeResult = analyzeService.reanalyze(request);
|
||||
|
||||
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
String annotatedFileName = Paths.get(filePath).getFileName().toString().replace(".pdf", "_annotated2.pdf");
|
||||
File tmpFile = Paths.get(OsUtils.getTemporaryDirectory(), annotatedFileName).toFile();
|
||||
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(tmpFile)) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
RedactionLogEntry resizedEntry = redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(expandedEntityKeyword)).findFirst().get();
|
||||
assertTrue(resizedEntry.getChanges().get(resizedEntry.getChanges().size() - 1).getType().equals(ChangeType.CHANGED));
|
||||
assertEquals(idToResize, resizedEntry.getId());
|
||||
assertEquals(1, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue1)).count());
|
||||
assertEquals(1, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue2) && !entry.lastChangeIsRemoved()).count());
|
||||
}
|
||||
|
||||
|
||||
private static com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle toAnnotationRectangle(Rectangle2D rectangle2D, int pageNumber) {
|
||||
|
||||
return new com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle((float) rectangle2D.getMaxX(),
|
||||
(float) rectangle2D.getMaxY() - (float) rectangle2D.getHeight(),
|
||||
(float) rectangle2D.getWidth(),
|
||||
-(float) rectangle2D.getHeight(),
|
||||
pageNumber);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testTableRedactionWithCvTableService() throws IOException {
|
||||
|
||||
@ -967,71 +875,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
|
||||
@Test
|
||||
public void testManualRedaction() throws IOException {
|
||||
|
||||
System.out.println("testManualRedaction");
|
||||
long start = System.currentTimeMillis();
|
||||
String pdfFile = "files/Minimal Examples/Single Table.pdf";
|
||||
|
||||
ManualRedactions manualRedactions = new ManualRedactions();
|
||||
|
||||
String manualAddId = UUID.randomUUID().toString();
|
||||
|
||||
Comment comment = Comment.builder().date(OffsetDateTime.now()).user("TEST_USER").text("This is a comment test").build();
|
||||
manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf").fileId("fileId").status(AnnotationStatus.DECLINED).build()));
|
||||
manualRedactions.setForceRedactions(Set.of(ManualForceRedaction.builder()
|
||||
.annotationId("675eba69b0c2917de55462c817adaa05")
|
||||
.fileId("fileId")
|
||||
.legalBasis("Something")
|
||||
.status(AnnotationStatus.APPROVED)
|
||||
.build()));
|
||||
|
||||
manualRedactions.getComments().put("e5be0f1d941bbb92a068e198648d06c4", List.of(comment));
|
||||
manualRedactions.getComments().put("0836727c3508a0b2ea271da69c04cc2f", List.of(comment));
|
||||
manualRedactions.getComments().put(manualAddId, List.of(comment));
|
||||
|
||||
ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry();
|
||||
manualRedactionEntry.setAnnotationId(manualAddId);
|
||||
manualRedactionEntry.setFileId("fileId");
|
||||
manualRedactionEntry.setStatus(AnnotationStatus.REQUESTED);
|
||||
manualRedactionEntry.setType("name");
|
||||
manualRedactionEntry.setValue("O'Loughlin C.K.");
|
||||
manualRedactionEntry.setReason("Manual Redaction");
|
||||
manualRedactionEntry.setPositions(List.of(Rectangle.builder().topLeftX(375.61096f).topLeftY(241.282f).width(7.648041f).height(43.72262f).page(1).build(),
|
||||
Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build()));
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage(pdfFile);
|
||||
request.setManualRedactions(manualRedactions);
|
||||
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
|
||||
manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf").fileId("fileId").status(AnnotationStatus.APPROVED).build()));
|
||||
manualRedactions.setLegalBasisChanges((Set.of(ManualLegalBasisChange.builder()
|
||||
.annotationId("675eba69b0c2917de55462c817adaa05")
|
||||
.fileId("fileId")
|
||||
.legalBasis("Manual Legal Basis Change")
|
||||
.status(AnnotationStatus.APPROVED)
|
||||
.build())));
|
||||
|
||||
analyzeService.reanalyze(request);
|
||||
|
||||
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
System.out.println("duration: " + (end - start));
|
||||
System.out.println("numberOfPages: " + result.getNumberOfPages());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void phantomCellsDocumentTest() throws IOException {
|
||||
public void phantomCellsDocumentTest() {
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/Phantom Cells.pdf");
|
||||
|
||||
|
||||
@ -180,9 +180,6 @@ public class AnnotationService {
|
||||
|
||||
private String createAnnotationContent(RedactionLogEntry redactionLogEntry) {
|
||||
|
||||
if (redactionLogEntry.isLocalManualRedaction()) {
|
||||
return "\nManual Redaction\n\nIn Section : \"" + redactionLogEntry.getSection() + "\"";
|
||||
}
|
||||
return redactionLogEntry.getType() + " \nRule " + redactionLogEntry.getMatchedRule() + " matched\n\n" + redactionLogEntry.getReason() + "\n\nLegal basis:" + redactionLogEntry.getLegalBasis() + "\n\nIn section: \"" + redactionLogEntry.getSection() + "\"";
|
||||
}
|
||||
|
||||
|
||||
@ -5,16 +5,16 @@ import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
|
||||
public class RedactionEntityTest {
|
||||
public class TextEntityTest {
|
||||
|
||||
@Test
|
||||
public void testMatchedRule() {
|
||||
|
||||
RedactionEntity entity = RedactionEntity.initialEntityNode(new Boundary(1, 100), "PII", EntityType.ENTITY);
|
||||
TextEntity entity = TextEntity.initialEntityNode(new TextRange(1, 100), "PII", EntityType.ENTITY);
|
||||
entity.skip("CBI.1.0", "");
|
||||
entity.skip("CBI.2.0", "");
|
||||
entity.skip("CBI.3.0", "");
|
||||
@ -25,24 +25,11 @@ public class RedactionEntityTest {
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testMatchedRuleWithManualRedaction() {
|
||||
|
||||
RedactionEntity entity = RedactionEntity.initialEntityNode(new Boundary(1, 100), "PII", EntityType.ENTITY);
|
||||
entity.skip("MAN.2.0", "");
|
||||
entity.skip("CBI.2.0", "");
|
||||
entity.skip("CBI.3.0", "");
|
||||
entity.skip("CBI.4.1", "");
|
||||
entity.skip("CBI.4.0", "");
|
||||
assertThat(entity.getMatchedRule().getRuleIdentifier().toString()).isEqualTo("MAN.2.0");
|
||||
assertThat(entity.getMatchedRuleUnit()).isEqualTo(2);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testMatchedRuleWithNonsense() {
|
||||
|
||||
RedactionEntity entity = RedactionEntity.initialEntityNode(new Boundary(1, 100), "PII", EntityType.ENTITY);
|
||||
TextEntity entity = TextEntity.initialEntityNode(new TextRange(1, 100), "PII", EntityType.ENTITY);
|
||||
assertThrows(IllegalArgumentException.class, () -> {
|
||||
entity.skip("", "");
|
||||
});
|
||||
@ -1,85 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class BoundaryTest {
|
||||
|
||||
Boundary startBoundary;
|
||||
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
|
||||
startBoundary = new Boundary(10, 100);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testContains() {
|
||||
|
||||
assertTrue(startBoundary.contains(11));
|
||||
assertTrue(startBoundary.contains(50));
|
||||
assertFalse(startBoundary.contains(9));
|
||||
assertFalse(startBoundary.contains(100));
|
||||
assertFalse(startBoundary.contains(150));
|
||||
assertFalse(startBoundary.contains(-123));
|
||||
assertTrue(startBoundary.contains(new Boundary(11, 99)));
|
||||
assertTrue(startBoundary.contains(new Boundary(10, 100)));
|
||||
assertTrue(startBoundary.contains(new Boundary(11, 11)));
|
||||
assertFalse(startBoundary.contains(9, 100));
|
||||
assertTrue(startBoundary.contains(100, 100));
|
||||
assertFalse(startBoundary.contains(100, 101));
|
||||
assertFalse(startBoundary.contains(150, 151));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testIntersects() {
|
||||
|
||||
assertTrue(startBoundary.intersects(new Boundary(1, 11)));
|
||||
assertTrue(startBoundary.intersects(new Boundary(11, 12)));
|
||||
assertTrue(startBoundary.intersects(new Boundary(11, 100)));
|
||||
assertFalse(startBoundary.intersects(new Boundary(100, 101)));
|
||||
assertFalse(startBoundary.intersects(new Boundary(9, 10)));
|
||||
assertFalse(startBoundary.intersects(new Boundary(0, 1)));
|
||||
assertFalse(startBoundary.intersects(new Boundary(1000, 1001)));
|
||||
assertTrue(startBoundary.intersects(new Boundary(99, 101)));
|
||||
assertTrue(startBoundary.intersects(new Boundary(99, 101)));
|
||||
assertTrue(startBoundary.intersects(new Boundary(9, 101)));
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testSplit() {
|
||||
|
||||
assertEquals(4, startBoundary.split(List.of(12, 40, 90)).size());
|
||||
assertEquals(List.of(new Boundary(10, 12), new Boundary(12, 40), new Boundary(40, 90), new Boundary(90, 100)), startBoundary.split(List.of(12, 40, 90)));
|
||||
assertEquals(List.of(new Boundary(10, 40), new Boundary(40, 100)), startBoundary.split(List.of(40)));
|
||||
assertEquals(1, startBoundary.split(Collections.emptyList()).size());
|
||||
assertEquals(1, startBoundary.split(List.of(startBoundary.start())).size());
|
||||
assertThrows(IndexOutOfBoundsException.class, () -> startBoundary.split(Collections.singletonList(0)));
|
||||
assertThrows(IndexOutOfBoundsException.class, () -> startBoundary.split(Collections.singletonList(100)));
|
||||
assertThrows(IndexOutOfBoundsException.class, () -> startBoundary.split(List.of(12, 40, 100)));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testCompareTo() {
|
||||
|
||||
Boundary beforeBoundary = new Boundary(1, 8);
|
||||
Boundary afterBoundary = new Boundary(101, 102);
|
||||
assertEquals(-1, beforeBoundary.compareTo(startBoundary));
|
||||
assertEquals(1, afterBoundary.compareTo(startBoundary));
|
||||
}
|
||||
|
||||
}
|
||||
@ -17,7 +17,7 @@ import org.mockito.MockitoAnnotations;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Headline;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.NodeType;
|
||||
@ -67,22 +67,22 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra
|
||||
|
||||
Document document = buildGraph("files/new/crafted document.pdf");
|
||||
String type = "CBI_author";
|
||||
assertTrue(entityCreationService.byBoundary(new Boundary(0, 10), type, EntityType.ENTITY, document).isPresent());
|
||||
assertTrue(entityCreationService.byBoundary(new Boundary(0, 10), type, EntityType.ENTITY, document).isPresent());
|
||||
assertTrue(entityCreationService.byBoundary(new TextRange(0, 10), type, EntityType.ENTITY, document).isPresent());
|
||||
assertTrue(entityCreationService.byBoundary(new TextRange(0, 10), type, EntityType.ENTITY, document).isPresent());
|
||||
assertEquals(1, document.getEntities().size());
|
||||
verify(kieSession, times(1)).insert(any(RedactionEntity.class));
|
||||
verify(kieSession, times(1)).insert(any(TextEntity.class));
|
||||
}
|
||||
|
||||
|
||||
private RedactionEntity createAndInsertEntity(Document document, String searchTerm) {
|
||||
private TextEntity createAndInsertEntity(Document document, String searchTerm) {
|
||||
|
||||
int start = document.getTextBlock().indexOf(searchTerm);
|
||||
assert start != -1;
|
||||
|
||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||
RedactionEntity redactionEntity = RedactionEntity.initialEntityNode(boundary, "123", EntityType.ENTITY);
|
||||
entityCreationService.addEntityToGraph(redactionEntity, document);
|
||||
return redactionEntity;
|
||||
TextRange textRange = new TextRange(start, start + searchTerm.length());
|
||||
TextEntity textEntity = TextEntity.initialEntityNode(textRange, "123", EntityType.ENTITY);
|
||||
entityCreationService.addEntityToGraph(textEntity, document);
|
||||
return textEntity;
|
||||
}
|
||||
|
||||
|
||||
@ -91,18 +91,18 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra
|
||||
|
||||
Document document = buildGraph("files/new/crafted document");
|
||||
String searchTerm = "Clarissa";
|
||||
RedactionEntity redactionEntity = createAndInsertEntity(document, searchTerm);
|
||||
TextEntity textEntity = createAndInsertEntity(document, searchTerm);
|
||||
|
||||
assertEquals("Expand to Hint ", redactionEntity.getTextBefore());
|
||||
assertEquals("’s Donut ←", redactionEntity.getTextAfter());
|
||||
assertEquals(searchTerm, redactionEntity.getValue());
|
||||
assertEquals("Expand to Hint ", textEntity.getTextBefore());
|
||||
assertEquals("’s Donut ←", textEntity.getTextAfter());
|
||||
assertEquals(searchTerm, textEntity.getValue());
|
||||
assertEquals("Rule 5: Do not redact genitive CBI_authors (Entries based on Dict) ",
|
||||
redactionEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
assertEquals(3, redactionEntity.getIntersectingNodes().size());
|
||||
assertEquals(5, redactionEntity.getDeepestFullyContainingNode().getNumberOnPage());
|
||||
assertInstanceOf(Paragraph.class, redactionEntity.getDeepestFullyContainingNode());
|
||||
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
assertEquals(3, textEntity.getIntersectingNodes().size());
|
||||
assertEquals(5, textEntity.getDeepestFullyContainingNode().getNumberOnPage());
|
||||
assertInstanceOf(Paragraph.class, textEntity.getDeepestFullyContainingNode());
|
||||
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity);
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
|
||||
}
|
||||
|
||||
|
||||
@ -111,17 +111,17 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra
|
||||
|
||||
Document document = buildGraph("files/new/crafted document");
|
||||
String searchTerm = "Rule 39:";
|
||||
RedactionEntity redactionEntity = createAndInsertEntity(document, searchTerm);
|
||||
TextEntity textEntity = createAndInsertEntity(document, searchTerm);
|
||||
|
||||
assertEquals("", redactionEntity.getTextBefore());
|
||||
assertEquals(" Purity Hint", redactionEntity.getTextAfter());
|
||||
assertEquals(searchTerm, redactionEntity.getValue());
|
||||
assertEquals("Rule 39: Purity Hint ", redactionEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
assertEquals(3, redactionEntity.getIntersectingNodes().size());
|
||||
assertEquals(6, redactionEntity.getDeepestFullyContainingNode().getNumberOnPage());
|
||||
assertInstanceOf(Headline.class, redactionEntity.getDeepestFullyContainingNode());
|
||||
assertEquals("", textEntity.getTextBefore());
|
||||
assertEquals(" Purity Hint", textEntity.getTextAfter());
|
||||
assertEquals(searchTerm, textEntity.getValue());
|
||||
assertEquals("Rule 39: Purity Hint ", textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
assertEquals(3, textEntity.getIntersectingNodes().size());
|
||||
assertEquals(6, textEntity.getDeepestFullyContainingNode().getNumberOnPage());
|
||||
assertInstanceOf(Headline.class, textEntity.getDeepestFullyContainingNode());
|
||||
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity);
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
|
||||
}
|
||||
|
||||
|
||||
@ -130,17 +130,17 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra
|
||||
|
||||
Document document = buildGraph("files/new/crafted document");
|
||||
String searchTerm = "1998";
|
||||
RedactionEntity redactionEntity = createAndInsertEntity(document, searchTerm);
|
||||
TextEntity textEntity = createAndInsertEntity(document, searchTerm);
|
||||
|
||||
assertEquals("", redactionEntity.getTextBefore());
|
||||
assertEquals("", redactionEntity.getTextAfter());
|
||||
assertEquals(searchTerm, redactionEntity.getValue());
|
||||
assertEquals("Rule 6-11 (Authors Table) ", redactionEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
assertEquals(5, redactionEntity.getIntersectingNodes().size());
|
||||
assertEquals(15, redactionEntity.getDeepestFullyContainingNode().getNumberOnPage());
|
||||
assertInstanceOf(TableCell.class, redactionEntity.getDeepestFullyContainingNode());
|
||||
assertEquals("", textEntity.getTextBefore());
|
||||
assertEquals("", textEntity.getTextAfter());
|
||||
assertEquals(searchTerm, textEntity.getValue());
|
||||
assertEquals("Rule 6-11 (Authors Table) ", textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
assertEquals(5, textEntity.getIntersectingNodes().size());
|
||||
assertEquals(15, textEntity.getDeepestFullyContainingNode().getNumberOnPage());
|
||||
assertInstanceOf(TableCell.class, textEntity.getDeepestFullyContainingNode());
|
||||
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity);
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
|
||||
}
|
||||
|
||||
|
||||
@ -212,19 +212,19 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra
|
||||
|
||||
Document document = buildGraph("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
|
||||
String searchTerm = "Cucurbit";
|
||||
RedactionEntity redactionEntity = createAndInsertEntity(document, searchTerm);
|
||||
TextEntity textEntity = createAndInsertEntity(document, searchTerm);
|
||||
|
||||
assertEquals("except Cranberry; Vegetable, ", redactionEntity.getTextBefore());
|
||||
assertEquals(", Group 9;", redactionEntity.getTextAfter());
|
||||
assertEquals("except Cranberry; Vegetable, ", textEntity.getTextBefore());
|
||||
assertEquals(", Group 9;", textEntity.getTextAfter());
|
||||
assertEquals("1.1.4 Evaluations carried out under other regulatory contexts ",
|
||||
redactionEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
assertEquals(searchTerm, redactionEntity.getValue());
|
||||
assertEquals(3, redactionEntity.getIntersectingNodes().size());
|
||||
assertEquals(5, redactionEntity.getDeepestFullyContainingNode().getNumberOnPage());
|
||||
assertTrue(redactionEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 10));
|
||||
assertInstanceOf(Paragraph.class, redactionEntity.getDeepestFullyContainingNode());
|
||||
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
assertEquals(searchTerm, textEntity.getValue());
|
||||
assertEquals(3, textEntity.getIntersectingNodes().size());
|
||||
assertEquals(5, textEntity.getDeepestFullyContainingNode().getNumberOnPage());
|
||||
assertTrue(textEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 10));
|
||||
assertInstanceOf(Paragraph.class, textEntity.getDeepestFullyContainingNode());
|
||||
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity);
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
|
||||
}
|
||||
|
||||
|
||||
@ -238,21 +238,21 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra
|
||||
start = document.getTextBlock().indexOf(searchTerm, start + 1);
|
||||
assert start != -1;
|
||||
|
||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||
RedactionEntity redactionEntity = RedactionEntity.initialEntityNode(boundary, "123", EntityType.ENTITY);
|
||||
entityCreationService.addEntityToGraph(redactionEntity, document);
|
||||
TextRange textRange = new TextRange(start, start + searchTerm.length());
|
||||
TextEntity textEntity = TextEntity.initialEntityNode(textRange, "123", EntityType.ENTITY);
|
||||
entityCreationService.addEntityToGraph(textEntity, document);
|
||||
|
||||
assertEquals("2.6.1 Summary of ", redactionEntity.getTextBefore());
|
||||
assertEquals(" and excretion in", redactionEntity.getTextAfter());
|
||||
assertEquals("2.6.1 Summary of ", textEntity.getTextBefore());
|
||||
assertEquals(" and excretion in", textEntity.getTextAfter());
|
||||
assertEquals("2.6.1 Summary of absorption, distribution, metabolism and excretion in mammals ",
|
||||
redactionEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
assertEquals(searchTerm, redactionEntity.getValue());
|
||||
assertEquals(3, redactionEntity.getIntersectingNodes().size());
|
||||
assertEquals(4, redactionEntity.getDeepestFullyContainingNode().getNumberOnPage());
|
||||
assertTrue(redactionEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 33));
|
||||
assertInstanceOf(Headline.class, redactionEntity.getDeepestFullyContainingNode());
|
||||
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
assertEquals(searchTerm, textEntity.getValue());
|
||||
assertEquals(3, textEntity.getIntersectingNodes().size());
|
||||
assertEquals(4, textEntity.getDeepestFullyContainingNode().getNumberOnPage());
|
||||
assertTrue(textEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 33));
|
||||
assertInstanceOf(Headline.class, textEntity.getDeepestFullyContainingNode());
|
||||
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity);
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
|
||||
}
|
||||
|
||||
|
||||
@ -261,32 +261,32 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra
|
||||
|
||||
Document document = buildGraph("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
|
||||
String searchTerm = "N-deacetylation product";
|
||||
RedactionEntity redactionEntity = createAndInsertEntity(document, searchTerm);
|
||||
TextEntity textEntity = createAndInsertEntity(document, searchTerm);
|
||||
|
||||
assertEquals("2-[(2-(1-hydroxy-ethyl)-6methyl-phenyl-amino]propan-1-ol (", redactionEntity.getTextBefore());
|
||||
assertEquals(" of metabolite of", redactionEntity.getTextAfter());
|
||||
assertEquals(searchTerm, redactionEntity.getValue());
|
||||
assertEquals(4, redactionEntity.getIntersectingNodes().size());
|
||||
assertEquals("2-[(2-(1-hydroxy-ethyl)-6methyl-phenyl-amino]propan-1-ol (", textEntity.getTextBefore());
|
||||
assertEquals(" of metabolite of", textEntity.getTextAfter());
|
||||
assertEquals(searchTerm, textEntity.getValue());
|
||||
assertEquals(4, textEntity.getIntersectingNodes().size());
|
||||
assertEquals("Table 2.7-1: List of substances and metabolites and related structural formula ",
|
||||
redactionEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
assertTrue(redactionEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 54));
|
||||
assertEquals(26, redactionEntity.getDeepestFullyContainingNode().getNumberOnPage());
|
||||
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
assertTrue(textEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 54));
|
||||
assertEquals(26, textEntity.getDeepestFullyContainingNode().getNumberOnPage());
|
||||
|
||||
assertInstanceOf(TableCell.class, redactionEntity.getDeepestFullyContainingNode());
|
||||
assertInstanceOf(TableCell.class, textEntity.getDeepestFullyContainingNode());
|
||||
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity);
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
|
||||
}
|
||||
|
||||
|
||||
// this might fail, if an entity with the same name exists twice in the deepest containing node
|
||||
private static void assertSameOffsetInAllIntersectingNodes(String searchTerm, RedactionEntity redactionEntity) {
|
||||
private static void assertSameOffsetInAllIntersectingNodes(String searchTerm, TextEntity textEntity) {
|
||||
|
||||
List<Integer> paragraphStart = redactionEntity.getIntersectingNodes().stream()//
|
||||
List<Integer> paragraphStart = textEntity.getIntersectingNodes().stream()//
|
||||
.map(SemanticNode::getTextBlock)//
|
||||
.map(textBlock -> textBlock.indexOf(searchTerm, redactionEntity.getDeepestFullyContainingNode().getBoundary().start()))//
|
||||
.map(textBlock -> textBlock.indexOf(searchTerm, textEntity.getDeepestFullyContainingNode().getTextRange().start()))//
|
||||
.toList();
|
||||
|
||||
paragraphStart.forEach(nodeStart -> assertEquals(redactionEntity.getBoundary().start(), nodeStart));
|
||||
paragraphStart.forEach(nodeStart -> assertEquals(textEntity.getTextRange().start(), nodeStart));
|
||||
}
|
||||
|
||||
|
||||
@ -296,17 +296,17 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra
|
||||
|
||||
assert start != -1;
|
||||
|
||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||
RedactionEntity redactionEntity = RedactionEntity.initialEntityNode(boundary, "123", EntityType.ENTITY);
|
||||
entityCreationService.addEntityToGraph(redactionEntity, document);
|
||||
TextRange textRange = new TextRange(start, start + searchTerm.length());
|
||||
TextEntity textEntity = TextEntity.initialEntityNode(textRange, "123", EntityType.ENTITY);
|
||||
entityCreationService.addEntityToGraph(textEntity, document);
|
||||
Page pageNode = document.getPages().stream().filter(page -> page.getNumber() == pageNumber).findFirst().orElseThrow();
|
||||
|
||||
assertEquals(redactionEntity.getValue(), searchTerm);
|
||||
assertTrue(pageNode.getEntities().contains(redactionEntity));
|
||||
assertTrue(document.getPages().stream().filter(page -> page != pageNode).noneMatch(page -> page.getEntities().contains(redactionEntity)));
|
||||
assertTrue(redactionEntity.getPages().contains(pageNode));
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity);
|
||||
assertTrue(redactionEntity.getIntersectingNodes().stream().allMatch(node -> node.getEntities().contains(redactionEntity)));
|
||||
assertEquals(textEntity.getValue(), searchTerm);
|
||||
assertTrue(pageNode.getEntities().contains(textEntity));
|
||||
assertTrue(document.getPages().stream().filter(page -> page != pageNode).noneMatch(page -> page.getEntities().contains(textEntity)));
|
||||
assertTrue(textEntity.getPages().contains(pageNode));
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
|
||||
assertTrue(textEntity.getIntersectingNodes().stream().allMatch(node -> node.getEntities().contains(textEntity)));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -33,7 +33,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Section;
|
||||
@ -136,7 +136,7 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(TEST_DOSSIER_TEMPLATE_ID, TEST_DOSSIER_ID);
|
||||
|
||||
long dictionarySearchStart = System.currentTimeMillis();
|
||||
List<RedactionEntity> foundEntities = new LinkedList<>();
|
||||
List<TextEntity> foundEntities = new LinkedList<>();
|
||||
for (DictionaryModel model : dictionary.getDictionaryModels()) {
|
||||
findEntitiesWithSearchImplementation(document, model.getEntriesSearch(), EntityType.ENTITY, foundEntities, model.getType());
|
||||
findEntitiesWithSearchImplementation(document, model.getFalsePositiveSearch(), EntityType.FALSE_POSITIVE, foundEntities, model.getType());
|
||||
@ -210,7 +210,7 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
|
||||
float totalSearchTime = 0;
|
||||
float totalGraphTime = 0;
|
||||
float totalInsertTime = 0;
|
||||
List<RedactionEntity> foundEntities = new LinkedList<>();
|
||||
List<TextEntity> foundEntities = new LinkedList<>();
|
||||
for (int i = 0; i < numberOfRuns; i++) {
|
||||
foundEntities = new LinkedList<>();
|
||||
|
||||
@ -256,9 +256,9 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
|
||||
for (Page page : document.getPages()) {
|
||||
List<Rectangle2D> entityPositionsOnPage = page.getEntities()
|
||||
.stream()
|
||||
.filter(entityNode -> !entityNode.isRemoved())
|
||||
.filter(RedactionEntity::isApplied)
|
||||
.flatMap(entityNode -> entityNode.getRedactionPositionsPerPage().stream())
|
||||
.filter(entityNode -> !entityNode.removed())
|
||||
.filter(TextEntity::applied)
|
||||
.flatMap(entityNode -> entityNode.getPositionsOnPagePerPage().stream())
|
||||
.filter(entityPosition -> entityPosition.getPage().equals(page))
|
||||
.flatMap(entityPosition -> entityPosition.getRectanglePerLine().stream())
|
||||
.toList();
|
||||
@ -270,9 +270,9 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
|
||||
for (Page page : document.getPages()) {
|
||||
List<Rectangle2D> entityPositionsOnPage = page.getEntities()
|
||||
.stream()
|
||||
.filter(entityNode -> !entityNode.isRemoved())
|
||||
.filter(entityNode -> !entityNode.isApplied())
|
||||
.flatMap(entityNode -> entityNode.getRedactionPositionsPerPage().stream())
|
||||
.filter(entityNode -> !entityNode.removed())
|
||||
.filter(entityNode -> !entityNode.applied())
|
||||
.flatMap(entityNode -> entityNode.getPositionsOnPagePerPage().stream())
|
||||
.filter(entityPosition -> entityPosition.getPage().equals(page))
|
||||
.flatMap(entityPosition -> entityPosition.getRectanglePerLine().stream())
|
||||
.toList();
|
||||
@ -289,14 +289,14 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
|
||||
private void findEntitiesWithSearchImplementation(Document document,
|
||||
SearchImplementation searchImplementation,
|
||||
EntityType entityType,
|
||||
List<RedactionEntity> foundEntities,
|
||||
List<TextEntity> foundEntities,
|
||||
String type) {
|
||||
|
||||
TextBlock textBlock = document.getTextBlock();
|
||||
searchImplementation.getBoundaries(textBlock, textBlock.getBoundary())
|
||||
searchImplementation.getBoundaries(textBlock, textBlock.getTextRange())
|
||||
.stream()
|
||||
.filter(boundary -> boundaryIsSurroundedBySeparators(textBlock, boundary))
|
||||
.map(bounds -> RedactionEntity.initialEntityNode(bounds, type, entityType))
|
||||
.map(bounds -> TextEntity.initialEntityNode(bounds, type, entityType))
|
||||
.forEach(foundEntities::add);
|
||||
}
|
||||
|
||||
|
||||
@ -5,6 +5,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Stream;
|
||||
@ -27,7 +28,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlo
|
||||
import com.iqser.red.service.redaction.v1.server.document.data.mapper.DocumentGraphMapper;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.CustomEntityCreationAdapter;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.ManualEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
@ -88,9 +89,9 @@ public class MigrationPocTest extends BuildDocumentIntegrationTest {
|
||||
|
||||
// IMPORTANT: always use the graph which is mapped from the DocumentData, since rounding errors occur during storage.
|
||||
Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(request.getDossierId(), request.getFileId()));
|
||||
List<EntityIdentifier> notFoundManualRedactionEntries = redactionLogAdapter.toRedactionEntity(originalRedactionLog, document);
|
||||
List<ManualEntity> notFoundManualRedactionEntries = redactionLogAdapter.toRedactionEntity(originalRedactionLog, document);
|
||||
|
||||
var migratedRedactionLogEntries = redactionLogCreatorService.createRedactionLog(document, TEST_DOSSIER_TEMPLATE_ID, notFoundManualRedactionEntries);
|
||||
var migratedRedactionLogEntries = redactionLogCreatorService.createRedactionLog(document, TEST_DOSSIER_TEMPLATE_ID, notFoundManualRedactionEntries, Collections.emptyMap());
|
||||
|
||||
Map<String, RedactionLogEntry> migratedIds = migratedRedactionLogEntries.stream().collect(toMap(RedactionLogEntry::getId, Functions.identity()));
|
||||
Map<String, RedactionLogEntry> newIds = newRedactionLog.getRedactionLogEntry().stream().collect(toMap(RedactionLogEntry::getId, Functions.identity()));
|
||||
|
||||
@ -8,7 +8,7 @@ import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
|
||||
@ -27,7 +27,7 @@ public class SearchImplementationTest extends BuildDocumentIntegrationTest {
|
||||
|
||||
SearchImplementation searchImplementation = new SearchImplementation(List.of("mydossierredaction"), true);
|
||||
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService);
|
||||
List<RedactionEntity> entities = entityCreationService.bySearchImplementation(searchImplementation, "dossier_redaction", EntityType.ENTITY, document).toList();
|
||||
List<TextEntity> entities = entityCreationService.bySearchImplementation(searchImplementation, "dossier_redaction", EntityType.ENTITY, document).toList();
|
||||
assertEquals(2, entities.size());
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,85 @@
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class TextRangeTest {
|
||||
|
||||
TextRange startTextRange;
|
||||
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
|
||||
startTextRange = new TextRange(10, 100);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testContains() {
|
||||
|
||||
assertTrue(startTextRange.contains(11));
|
||||
assertTrue(startTextRange.contains(50));
|
||||
assertFalse(startTextRange.contains(9));
|
||||
assertFalse(startTextRange.contains(100));
|
||||
assertFalse(startTextRange.contains(150));
|
||||
assertFalse(startTextRange.contains(-123));
|
||||
assertTrue(startTextRange.contains(new TextRange(11, 99)));
|
||||
assertTrue(startTextRange.contains(new TextRange(10, 100)));
|
||||
assertTrue(startTextRange.contains(new TextRange(11, 11)));
|
||||
assertFalse(startTextRange.contains(9, 100));
|
||||
assertTrue(startTextRange.contains(100, 100));
|
||||
assertFalse(startTextRange.contains(100, 101));
|
||||
assertFalse(startTextRange.contains(150, 151));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testIntersects() {
|
||||
|
||||
assertTrue(startTextRange.intersects(new TextRange(1, 11)));
|
||||
assertTrue(startTextRange.intersects(new TextRange(11, 12)));
|
||||
assertTrue(startTextRange.intersects(new TextRange(11, 100)));
|
||||
assertFalse(startTextRange.intersects(new TextRange(100, 101)));
|
||||
assertFalse(startTextRange.intersects(new TextRange(9, 10)));
|
||||
assertFalse(startTextRange.intersects(new TextRange(0, 1)));
|
||||
assertFalse(startTextRange.intersects(new TextRange(1000, 1001)));
|
||||
assertTrue(startTextRange.intersects(new TextRange(99, 101)));
|
||||
assertTrue(startTextRange.intersects(new TextRange(99, 101)));
|
||||
assertTrue(startTextRange.intersects(new TextRange(9, 101)));
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testSplit() {
|
||||
|
||||
assertEquals(4, startTextRange.split(List.of(12, 40, 90)).size());
|
||||
assertEquals(List.of(new TextRange(10, 12), new TextRange(12, 40), new TextRange(40, 90), new TextRange(90, 100)), startTextRange.split(List.of(12, 40, 90)));
|
||||
assertEquals(List.of(new TextRange(10, 40), new TextRange(40, 100)), startTextRange.split(List.of(40)));
|
||||
assertEquals(1, startTextRange.split(Collections.emptyList()).size());
|
||||
assertEquals(1, startTextRange.split(List.of(startTextRange.start())).size());
|
||||
assertThrows(IndexOutOfBoundsException.class, () -> startTextRange.split(Collections.singletonList(0)));
|
||||
assertThrows(IndexOutOfBoundsException.class, () -> startTextRange.split(Collections.singletonList(100)));
|
||||
assertThrows(IndexOutOfBoundsException.class, () -> startTextRange.split(List.of(12, 40, 100)));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testCompareTo() {
|
||||
|
||||
TextRange beforeTextRange = new TextRange(1, 8);
|
||||
TextRange afterTextRange = new TextRange(101, 102);
|
||||
assertEquals(-1, beforeTextRange.compareTo(startTextRange));
|
||||
assertEquals(1, afterTextRange.compareTo(startTextRange));
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph;
|
||||
package com.iqser.red.service.redaction.v1.server.manualchanges;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.mockito.Mockito.when;
|
||||
@ -6,6 +6,7 @@ import static org.wildfly.common.Assert.assertFalse;
|
||||
import static org.wildfly.common.Assert.assertTrue;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
@ -19,19 +20,20 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.BuildDocumentIntegrationTest;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.CustomEntityCreationAdapter;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.ManualEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
public class ManualRedactionEntryTest extends BuildDocumentIntegrationTest {
|
||||
public class CustomEntityCreationAdapterTest extends BuildDocumentIntegrationTest {
|
||||
|
||||
@Autowired
|
||||
private EntityEnrichmentService entityEnrichmentService;
|
||||
@ -48,7 +50,7 @@ public class ManualRedactionEntryTest extends BuildDocumentIntegrationTest {
|
||||
|
||||
@BeforeEach
|
||||
public void stubMethods() {
|
||||
|
||||
|
||||
MockitoAnnotations.openMocks(this);
|
||||
when(dictionaryService.getColor(DICTIONARY_AUTHOR, TEST_DOSSIER_TEMPLATE_ID)).thenReturn(new float[]{0f, 0f, 0f});
|
||||
}
|
||||
@ -61,10 +63,10 @@ public class ManualRedactionEntryTest extends BuildDocumentIntegrationTest {
|
||||
Document document = buildGraph("files/new/VV-919901.pdf");
|
||||
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService);
|
||||
|
||||
List<RedactionEntity> tempEntities = entityCreationService.byString("To: Syngenta Ltd.", "temp", EntityType.ENTITY, document).toList();
|
||||
List<TextEntity> tempEntities = entityCreationService.byString("To: Syngenta Ltd.", "temp", EntityType.ENTITY, document).toList();
|
||||
assertFalse(tempEntities.isEmpty());
|
||||
var tempEntity = tempEntities.get(0);
|
||||
List<Rectangle> positions = tempEntity.getRedactionPositionsPerPage()
|
||||
List<Rectangle> positions = tempEntity.getPositionsOnPagePerPage()
|
||||
.stream()
|
||||
.flatMap(redactionPosition -> redactionPosition.getRectanglePerLine()
|
||||
.stream()
|
||||
@ -86,9 +88,8 @@ public class ManualRedactionEntryTest extends BuildDocumentIntegrationTest {
|
||||
tempEntity.removeFromGraph();
|
||||
assertTrue(document.getEntities().isEmpty());
|
||||
|
||||
List<EntityIdentifier> notFoundEntityIdentifiers = customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set.of(manualRedactionEntry),
|
||||
document);
|
||||
assertTrue(notFoundEntityIdentifiers.isEmpty());
|
||||
List<ManualEntity> notFoundManualEntities = customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set.of(manualRedactionEntry), document);
|
||||
assertTrue(notFoundManualEntities.isEmpty());
|
||||
assertEquals(1, document.getEntities().size());
|
||||
}
|
||||
|
||||
@ -115,12 +116,14 @@ public class ManualRedactionEntryTest extends BuildDocumentIntegrationTest {
|
||||
|
||||
assertTrue(document.getEntities().isEmpty());
|
||||
|
||||
List<EntityIdentifier> notFoundEntityIdentifiers = customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set.of(manualRedactionEntry),
|
||||
document);
|
||||
assertEquals(1, notFoundEntityIdentifiers.size());
|
||||
List<ManualEntity> notFoundManualEntities = customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set.of(manualRedactionEntry), document);
|
||||
assertEquals(1, notFoundManualEntities.size());
|
||||
assertTrue(document.getEntities().isEmpty());
|
||||
|
||||
List<RedactionLogEntry> redactionLogEntries = redactionLogCreatorService.createRedactionLog(document, TEST_DOSSIER_TEMPLATE_ID, notFoundEntityIdentifiers);
|
||||
List<RedactionLogEntry> redactionLogEntries = redactionLogCreatorService.createRedactionLog(document,
|
||||
TEST_DOSSIER_TEMPLATE_ID,
|
||||
notFoundManualEntities,
|
||||
Collections.emptyMap());
|
||||
|
||||
assertEquals(1, redactionLogEntries.size());
|
||||
assertEquals(value, redactionLogEntries.get(0).getValue());
|
||||
@ -0,0 +1,323 @@
|
||||
package com.iqser.red.service.redaction.v1.server.manualchanges;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.mockito.Mockito.when;
|
||||
import static org.wildfly.common.Assert.assertTrue;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Paths;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.ComponentScan;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.FilterType;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.test.context.junit.jupiter.SpringExtension;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Comment;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ChangeType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.server.AbstractRedactionIntegrationTest;
|
||||
import com.iqser.red.service.redaction.v1.server.Application;
|
||||
import com.iqser.red.service.redaction.v1.server.FileSystemBackedStorageService;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.document.data.mapper.DocumentGraphMapper;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@ExtendWith(SpringExtension.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
@Import(ManualChangesEnd2EndTest.TestConfiguration.class)
|
||||
public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
private static final String RULES = loadFromClassPath("drools/rules.drl");
|
||||
@Autowired
|
||||
private EntityEnrichmentService entityEnrichmentService;
|
||||
|
||||
private EntityCreationService entityCreationService;
|
||||
|
||||
@Configuration
|
||||
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
|
||||
@Import(LayoutParsingServiceProcessorConfiguration.class)
|
||||
@ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)})
|
||||
public static class TestConfiguration {
|
||||
|
||||
@Bean
|
||||
@Primary
|
||||
public StorageService inmemoryStorage() {
|
||||
|
||||
return new FileSystemBackedStorageService();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@BeforeEach
|
||||
public void createServices() {
|
||||
|
||||
entityCreationService = new EntityCreationService(entityEnrichmentService);
|
||||
}
|
||||
|
||||
|
||||
@BeforeEach
|
||||
public void stubClients() {
|
||||
|
||||
TenantContext.setTenantId("redaction");
|
||||
|
||||
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(System.currentTimeMillis());
|
||||
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(RULES));
|
||||
|
||||
loadDictionaryForTest();
|
||||
loadTypeForTest();
|
||||
loadNerForTest();
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, false)).thenReturn(getTypeResponse());
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
|
||||
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(DOSSIER_REDACTIONS_INDICATOR)
|
||||
.dossierTemplateId(TEST_DOSSIER_ID)
|
||||
.hexColor("#ffe187")
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.build()));
|
||||
|
||||
mockDictionaryCalls(null);
|
||||
|
||||
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testManualResizeRedactionRemovesContainedEntities() {
|
||||
|
||||
String filePath = "files/new/crafted document.pdf";
|
||||
AnalyzeRequest request = uploadFileToStorage(filePath);
|
||||
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
String testEntityValue1 = "Desiree";
|
||||
String testEntityValue2 = "Melanie";
|
||||
RedactionLog redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
assertEquals(2, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue1)).count());
|
||||
assertEquals(2, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue2)).count());
|
||||
|
||||
Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(TEST_DOSSIER_ID, TEST_FILE_ID));
|
||||
String expandedEntityKeyword = "Lorem ipsum dolor sit amet, consectetur adipiscing elit Desiree et al sed do eiusmod tempor incididunt ut labore et dolore magna aliqua Melanie et al. Reference No 12345 Lorem ipsum.";
|
||||
TextEntity expandedEntity = entityCreationService.byString(expandedEntityKeyword, "PII", EntityType.ENTITY, document).findFirst().get();
|
||||
|
||||
String idToResize = redactionLog.getRedactionLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> entry.getValue().equals(testEntityValue1))
|
||||
.max(Comparator.comparingInt(RedactionLogEntry::getStartOffset))
|
||||
.get()
|
||||
.getId();
|
||||
List<Rectangle> resizedPositions = expandedEntity.getPositionsOnPagePerPage()
|
||||
.get(0)
|
||||
.getRectanglePerLine()
|
||||
.stream()
|
||||
.map(rectangle2D -> toAnnotationRectangle(rectangle2D, 3))
|
||||
.toList();
|
||||
ManualResizeRedaction manualResizeRedaction = ManualResizeRedaction.builder()
|
||||
.annotationId(idToResize)
|
||||
.value(expandedEntityKeyword)
|
||||
.positions(resizedPositions)
|
||||
.status(AnnotationStatus.APPROVED)
|
||||
.build();
|
||||
|
||||
ManualRedactions manualRedactions = new ManualRedactions();
|
||||
manualRedactions.getResizeRedactions().add(manualResizeRedaction);
|
||||
request.setManualRedactions(manualRedactions);
|
||||
AnalyzeResult reanalyzeResult = analyzeService.reanalyze(request);
|
||||
|
||||
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
String annotatedFileName = Paths.get(filePath).getFileName().toString().replace(".pdf", "_annotated2.pdf");
|
||||
File tmpFile = Paths.get(OsUtils.getTemporaryDirectory(), annotatedFileName).toFile();
|
||||
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(tmpFile)) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
RedactionLogEntry resizedEntry = redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(expandedEntityKeyword)).findFirst().get();
|
||||
assertTrue(resizedEntry.getChanges().get(resizedEntry.getChanges().size() - 1).getType().equals(ChangeType.CHANGED));
|
||||
assertEquals(idToResize, resizedEntry.getId());
|
||||
assertEquals(1, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue1)).count());
|
||||
assertEquals(1, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue2) && !entry.lastChangeIsRemoved()).count());
|
||||
}
|
||||
|
||||
|
||||
private static com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle toAnnotationRectangle(Rectangle2D rectangle2D, int pageNumber) {
|
||||
|
||||
return new com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle((float) rectangle2D.getMaxX(),
|
||||
(float) rectangle2D.getMaxY() - (float) rectangle2D.getHeight(),
|
||||
(float) rectangle2D.getWidth(),
|
||||
-(float) rectangle2D.getHeight(),
|
||||
pageNumber);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testManualRedaction() throws IOException {
|
||||
|
||||
System.out.println("testManualRedaction");
|
||||
long start = System.currentTimeMillis();
|
||||
String pdfFile = "files/Minimal Examples/Single Table.pdf";
|
||||
|
||||
ManualRedactions manualRedactions = new ManualRedactions();
|
||||
|
||||
String manualAddId = UUID.randomUUID().toString();
|
||||
|
||||
Comment comment = Comment.builder().date(OffsetDateTime.now()).user("TEST_USER").text("This is a comment test").build();
|
||||
manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf").fileId("fileId").status(AnnotationStatus.DECLINED).build()));
|
||||
manualRedactions.setForceRedactions(Set.of(ManualForceRedaction.builder()
|
||||
.annotationId("675eba69b0c2917de55462c817adaa05")
|
||||
.fileId("fileId")
|
||||
.legalBasis("Something")
|
||||
.status(AnnotationStatus.APPROVED)
|
||||
.build()));
|
||||
|
||||
manualRedactions.getComments().put("e5be0f1d941bbb92a068e198648d06c4", List.of(comment));
|
||||
manualRedactions.getComments().put("0836727c3508a0b2ea271da69c04cc2f", List.of(comment));
|
||||
manualRedactions.getComments().put(manualAddId, List.of(comment));
|
||||
|
||||
ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry();
|
||||
manualRedactionEntry.setAnnotationId(manualAddId);
|
||||
manualRedactionEntry.setFileId("fileId");
|
||||
manualRedactionEntry.setStatus(AnnotationStatus.REQUESTED);
|
||||
manualRedactionEntry.setType("name");
|
||||
manualRedactionEntry.setValue("O'Loughlin C.K.");
|
||||
manualRedactionEntry.setReason("Manual Redaction");
|
||||
manualRedactionEntry.setPositions(List.of(Rectangle.builder().topLeftX(375.61096f).topLeftY(241.282f).width(7.648041f).height(43.72262f).page(1).build(),
|
||||
Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build()));
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage(pdfFile);
|
||||
request.setManualRedactions(manualRedactions);
|
||||
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
|
||||
manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf").fileId("fileId").status(AnnotationStatus.APPROVED).build()));
|
||||
manualRedactions.setLegalBasisChanges((Set.of(ManualLegalBasisChange.builder()
|
||||
.annotationId("675eba69b0c2917de55462c817adaa05")
|
||||
.fileId("fileId")
|
||||
.legalBasis("Manual Legal Basis Change")
|
||||
.status(AnnotationStatus.APPROVED)
|
||||
.build())));
|
||||
|
||||
analyzeService.reanalyze(request);
|
||||
|
||||
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
System.out.println("duration: " + (end - start));
|
||||
System.out.println("numberOfPages: " + result.getNumberOfPages());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testReCategorizeToVertebrateChangesCbiAuthor() {
|
||||
|
||||
String filePath = "files/new/crafted document.pdf";
|
||||
AnalyzeRequest request = uploadFileToStorage(filePath);
|
||||
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
RedactionLog redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
var oxfordUniversityPress = redactionLog.getRedactionLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> entry.getType().equals("published_information"))
|
||||
.filter(entry -> entry.getValue().equals("Oxford University Press"))
|
||||
.findFirst()
|
||||
.get();
|
||||
|
||||
var asyaLyon = redactionLog.getRedactionLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> entry.getType().equals("CBI_author"))
|
||||
.filter(entry -> entry.getValue().equals("Asya Lyon"))
|
||||
.findFirst()
|
||||
.get();
|
||||
|
||||
assertEquals("CBI.3.2", asyaLyon.getMatchedRule());
|
||||
assertEquals("No vertebrate found", asyaLyon.getReason());
|
||||
|
||||
ManualImageRecategorization recategorization = ManualImageRecategorization.builder()
|
||||
.requestDate(OffsetDateTime.now())
|
||||
.status(AnnotationStatus.APPROVED)
|
||||
.type("vertebrate")
|
||||
.annotationId(oxfordUniversityPress.getId())
|
||||
.fileId(TEST_FILE_ID)
|
||||
.build();
|
||||
|
||||
request.setManualRedactions(new ManualRedactions());
|
||||
request.getManualRedactions().setImageRecategorization(Set.of(recategorization));
|
||||
|
||||
analyzeService.reanalyze(request);
|
||||
RedactionLog redactionLog2 = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
assertFalse(redactionLog2.getRedactionLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> entry.getType().equals("published_information"))
|
||||
.anyMatch(entry -> entry.getValue().equals("Oxford University Press")));
|
||||
|
||||
var oxfordUniversityPressRecategorized = redactionLog2.getRedactionLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> entry.getType().equals("vertebrate"))
|
||||
.filter(entry -> entry.getValue().equals("Oxford University Press"))
|
||||
.findFirst()
|
||||
.get();
|
||||
|
||||
assertEquals(1, oxfordUniversityPressRecategorized.getManualChanges().size());
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph;
|
||||
package com.iqser.red.service.redaction.v1.server.manualchanges;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
@ -31,24 +31,25 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.BuildDocumentIntegrationTest;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.PositionOnPage;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService;
|
||||
|
||||
@Import(ManualResizeRedactionIntegrationTest.TestConfiguration.class)
|
||||
public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrationTest {
|
||||
@Import(ManualChangesIntegrationTest.TestConfiguration.class)
|
||||
public class ManualChangesIntegrationTest extends BuildDocumentIntegrationTest {
|
||||
|
||||
private static final String RULES = "drools/manual_redaction_rules.drl";
|
||||
|
||||
@Autowired
|
||||
private EntityEnrichmentService entityEnrichmentService;
|
||||
private EntityCreationService entityCreationService;
|
||||
private ManualRedactionApplicationService manualRedactionApplicationService;
|
||||
private ManualChangesApplicationService manualChangesApplicationService;
|
||||
|
||||
@Qualifier("kieContainer")
|
||||
@Autowired
|
||||
@ -79,7 +80,7 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
|
||||
public void createServices() {
|
||||
|
||||
entityCreationService = new EntityCreationService(entityEnrichmentService);
|
||||
manualRedactionApplicationService = new ManualRedactionApplicationService(entityCreationService);
|
||||
manualChangesApplicationService = new ManualChangesApplicationService(entityCreationService);
|
||||
}
|
||||
|
||||
|
||||
@ -87,23 +88,23 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
|
||||
public void manualResizeRedactionTest() {
|
||||
|
||||
Document document = buildGraph("files/new/crafted document");
|
||||
Set<RedactionEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
|
||||
Set<RedactionEntity> biggerEntities = entityCreationService.byString("David Ksenia Max Mustermann", "CBI_author", EntityType.ENTITY, document)
|
||||
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
|
||||
Set<TextEntity> biggerEntities = entityCreationService.byString("David Ksenia Max Mustermann", "CBI_author", EntityType.ENTITY, document)
|
||||
.collect(Collectors.toUnmodifiableSet());
|
||||
|
||||
RedactionEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
|
||||
RedactionEntity biggerEntity = biggerEntities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
|
||||
TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
|
||||
TextEntity biggerEntity = biggerEntities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
|
||||
|
||||
String initialId = entity.getRedactionPositionsPerPage().get(0).getId();
|
||||
String initialId = entity.getPositionsOnPagePerPage().get(0).getId();
|
||||
ManualResizeRedaction manualResizeRedaction = ManualResizeRedaction.builder()
|
||||
.annotationId(initialId)
|
||||
.value(biggerEntity.getValue())
|
||||
.positions(toAnnotationRectangles(biggerEntity.getRedactionPositionsPerPage().get(0)))
|
||||
.positions(toAnnotationRectangles(biggerEntity.getPositionsOnPagePerPage().get(0)))
|
||||
.status(AnnotationStatus.APPROVED)
|
||||
.build();
|
||||
|
||||
KieSession kieSession = kieContainer.newKieSession();
|
||||
kieSession.setGlobal("manualRedactionApplicationService", manualRedactionApplicationService);
|
||||
kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService);
|
||||
kieSession.insert(document);
|
||||
document.streamAllSubNodes().forEach(kieSession::insert);
|
||||
kieSession.insert(entity);
|
||||
@ -111,14 +112,14 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
|
||||
kieSession.fireAllRules();
|
||||
kieSession.dispose();
|
||||
|
||||
assertEquals(biggerEntity.getBoundary(), entity.getBoundary());
|
||||
assertEquals(biggerEntity.getTextRange(), entity.getTextRange());
|
||||
assertEquals(biggerEntity.getDeepestFullyContainingNode(), entity.getDeepestFullyContainingNode());
|
||||
assertEquals(biggerEntity.getIntersectingNodes(), entity.getIntersectingNodes());
|
||||
assertEquals(biggerEntity.getPages(), entity.getPages());
|
||||
assertEquals(biggerEntity.getValue(), entity.getValue());
|
||||
assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId());
|
||||
assertRectanglesAlmostEqual(biggerEntity.getRedactionPositionsPerPage().get(0).getRectanglePerLine(), entity.getRedactionPositionsPerPage().get(0).getRectanglePerLine());
|
||||
assertTrue(entity.isResized());
|
||||
assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId());
|
||||
assertRectanglesAlmostEqual(biggerEntity.getPositionsOnPagePerPage().get(0).getRectanglePerLine(), entity.getPositionsOnPagePerPage().get(0).getRectanglePerLine());
|
||||
assertTrue(entity.resized());
|
||||
}
|
||||
|
||||
|
||||
@ -126,11 +127,11 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
|
||||
public void manualForceRedactionTest() {
|
||||
|
||||
Document document = buildGraph("files/new/crafted document");
|
||||
Set<RedactionEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
|
||||
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
|
||||
|
||||
RedactionEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
|
||||
TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
|
||||
|
||||
String initialId = entity.getRedactionPositionsPerPage().get(0).getId();
|
||||
String initialId = entity.getPositionsOnPagePerPage().get(0).getId();
|
||||
ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder()
|
||||
.annotationId(initialId)
|
||||
.status(AnnotationStatus.APPROVED)
|
||||
@ -139,7 +140,7 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
|
||||
.build();
|
||||
|
||||
KieSession kieSession = kieContainer.newKieSession();
|
||||
kieSession.setGlobal("manualRedactionApplicationService", manualRedactionApplicationService);
|
||||
kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService);
|
||||
kieSession.insert(entity);
|
||||
kieSession.insert(manualForceRedaction);
|
||||
kieSession.insert(document);
|
||||
@ -151,11 +152,11 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
|
||||
assertFalse(entity.getIntersectingNodes().isEmpty());
|
||||
assertEquals(1, entity.getPages().size());
|
||||
assertEquals("David Ksenia", entity.getValue());
|
||||
assertEquals("Something", entity.getMatchedRule().getLegalBasis());
|
||||
assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId());
|
||||
assertFalse(entity.isRemoved());
|
||||
assertTrue(entity.isSkipRemoveEntitiesContainedInLarger());
|
||||
assertTrue(entity.isApplied());
|
||||
assertEquals("Something", entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis()));
|
||||
assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId());
|
||||
assertFalse(entity.removed());
|
||||
assertTrue(entity.hasManualChanges());
|
||||
assertTrue(entity.applied());
|
||||
}
|
||||
|
||||
|
||||
@ -163,15 +164,15 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
|
||||
public void manualIDRemovalTest() {
|
||||
|
||||
Document document = buildGraph("files/new/crafted document");
|
||||
Set<RedactionEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
|
||||
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
|
||||
|
||||
RedactionEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
|
||||
TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
|
||||
|
||||
String initialId = entity.getRedactionPositionsPerPage().get(0).getId();
|
||||
String initialId = entity.getPositionsOnPagePerPage().get(0).getId();
|
||||
IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).status(AnnotationStatus.APPROVED).requestDate(OffsetDateTime.now()).build();
|
||||
|
||||
KieSession kieSession = kieContainer.newKieSession();
|
||||
kieSession.setGlobal("manualRedactionApplicationService", manualRedactionApplicationService);
|
||||
kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService);
|
||||
kieSession.insert(document);
|
||||
document.streamAllSubNodes().forEach(kieSession::insert);
|
||||
kieSession.insert(entity);
|
||||
@ -180,8 +181,8 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
|
||||
kieSession.dispose();
|
||||
|
||||
assertEquals("David Ksenia", entity.getValue());
|
||||
assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId());
|
||||
assertTrue(entity.isIgnored());
|
||||
assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId());
|
||||
assertTrue(entity.ignored());
|
||||
}
|
||||
|
||||
|
||||
@ -189,11 +190,11 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
|
||||
public void manualIDRemovalButAlsoForceRedactionTest() {
|
||||
|
||||
Document document = buildGraph("files/new/crafted document");
|
||||
Set<RedactionEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
|
||||
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
|
||||
|
||||
RedactionEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
|
||||
TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
|
||||
|
||||
String initialId = entity.getRedactionPositionsPerPage().get(0).getId();
|
||||
String initialId = entity.getPositionsOnPagePerPage().get(0).getId();
|
||||
IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).status(AnnotationStatus.APPROVED).requestDate(OffsetDateTime.now()).build();
|
||||
ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder()
|
||||
.annotationId(initialId)
|
||||
@ -203,7 +204,7 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
|
||||
.build();
|
||||
|
||||
KieSession kieSession = kieContainer.newKieSession();
|
||||
kieSession.setGlobal("manualRedactionApplicationService", manualRedactionApplicationService);
|
||||
kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService);
|
||||
kieSession.insert(document);
|
||||
document.streamAllSubNodes().forEach(kieSession::insert);
|
||||
kieSession.insert(entity);
|
||||
@ -216,9 +217,9 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
|
||||
assertFalse(entity.getIntersectingNodes().isEmpty());
|
||||
assertEquals(1, entity.getPages().size());
|
||||
assertEquals("David Ksenia", entity.getValue());
|
||||
assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId());
|
||||
assertFalse(entity.isRemoved());
|
||||
assertFalse(entity.isIgnored());
|
||||
assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId());
|
||||
assertFalse(entity.removed());
|
||||
assertFalse(entity.ignored());
|
||||
}
|
||||
|
||||
|
||||
@ -226,15 +227,15 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
|
||||
public void manualIDRemovalNotApprovedTest() {
|
||||
|
||||
Document document = buildGraph("files/new/crafted document");
|
||||
Set<RedactionEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
|
||||
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
|
||||
|
||||
RedactionEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
|
||||
TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
|
||||
|
||||
String initialId = entity.getRedactionPositionsPerPage().get(0).getId();
|
||||
String initialId = entity.getPositionsOnPagePerPage().get(0).getId();
|
||||
IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).status(AnnotationStatus.REQUESTED).build();
|
||||
|
||||
KieSession kieSession = kieContainer.newKieSession();
|
||||
kieSession.setGlobal("manualRedactionApplicationService", manualRedactionApplicationService);
|
||||
kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService);
|
||||
kieSession.insert(entity);
|
||||
kieSession.insert(idRemoval);
|
||||
kieSession.insert(document);
|
||||
@ -246,8 +247,9 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
|
||||
assertFalse(entity.getIntersectingNodes().isEmpty());
|
||||
assertEquals(1, entity.getPages().size());
|
||||
assertEquals("David Ksenia", entity.getValue());
|
||||
assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId());
|
||||
assertFalse(entity.isRemoved());
|
||||
assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId());
|
||||
assertFalse(entity.ignored());
|
||||
assertFalse(entity.removed());
|
||||
}
|
||||
|
||||
|
||||
@ -271,9 +273,9 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
|
||||
}
|
||||
|
||||
|
||||
private static List<Rectangle> toAnnotationRectangles(RedactionPosition redactionPositions) {
|
||||
private static List<Rectangle> toAnnotationRectangles(PositionOnPage positionsOnPage) {
|
||||
|
||||
return redactionPositions.getRectanglePerLine().stream().map(rectangle2D -> toAnnotationRectangle(rectangle2D, redactionPositions.getPage().getNumber())).toList();
|
||||
return positionsOnPage.getRectanglePerLine().stream().map(rectangle2D -> toAnnotationRectangle(rectangle2D, positionsOnPage.getPage().getNumber())).toList();
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,134 @@
|
||||
package com.iqser.red.service.redaction.v1.server.manualchanges;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.BuildDocumentIntegrationTest;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
|
||||
|
||||
public class ManualChangesUnitTest extends BuildDocumentIntegrationTest {
|
||||
|
||||
@Autowired
|
||||
private EntityEnrichmentService entityEnrichmentService;
|
||||
|
||||
private EntityCreationService entityCreationService;
|
||||
|
||||
|
||||
@BeforeEach
|
||||
public void createServices() {
|
||||
|
||||
entityCreationService = new EntityCreationService(entityEnrichmentService);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testBasicOverrides() {
|
||||
|
||||
OffsetDateTime start = OffsetDateTime.now();
|
||||
String reason = "whatever";
|
||||
Document document = buildGraphNoImages("files/new/crafted document.pdf");
|
||||
List<TextEntity> entities = entityCreationService.byString("David Ksenia", "test", EntityType.ENTITY, document).peek(e -> e.apply("T.0.0", reason)).toList();
|
||||
assertFalse(entities.isEmpty());
|
||||
TextEntity entity = entities.get(0);
|
||||
assertTrue(entity.active());
|
||||
assertTrue(entity.applied());
|
||||
assertFalse(entity.removed());
|
||||
assertFalse(entity.resized());
|
||||
assertFalse(entity.ignored());
|
||||
assertEquals("n-a", entity.getMatchedRule().getLegalBasis());
|
||||
String annotationId = entity.getPositionsOnPagePerPage().get(0).getId();
|
||||
|
||||
// remove first
|
||||
IdRemoval removal = IdRemoval.builder().requestDate(start).fileId(TEST_FILE_ID).annotationId(annotationId).status(AnnotationStatus.APPROVED).build();
|
||||
entity.getManualOverwrite().addChange(removal);
|
||||
assertTrue(entity.ignored());
|
||||
assertFalse(entity.applied());
|
||||
assertEquals(reason + ", removed by manual override", entity.buildReasonWithManualChangeDescriptions());
|
||||
|
||||
// force again
|
||||
ManualForceRedaction forceRedaction = ManualForceRedaction.builder()
|
||||
.requestDate(start.plusSeconds(1))
|
||||
.fileId(TEST_FILE_ID)
|
||||
.annotationId(annotationId)
|
||||
.legalBasis("coolio")
|
||||
.status(AnnotationStatus.APPROVED)
|
||||
.build();
|
||||
entity.getManualOverwrite().addChange(forceRedaction);
|
||||
assertTrue(entity.applied());
|
||||
assertFalse(entity.ignored());
|
||||
assertFalse(entity.removed());
|
||||
assertEquals(reason + ", removed by manual override, forced by manual override", entity.buildReasonWithManualChangeDescriptions());
|
||||
assertEquals("coolio", entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis()));
|
||||
|
||||
// remove again
|
||||
IdRemoval removal2 = IdRemoval.builder().requestDate(start.plusSeconds(3)).fileId(TEST_FILE_ID).annotationId(annotationId).status(AnnotationStatus.APPROVED).build();
|
||||
entity.getManualOverwrite().addChange(removal2);
|
||||
assertTrue(entity.ignored());
|
||||
assertFalse(entity.applied());
|
||||
assertEquals(reason + ", removed by manual override, forced by manual override, removed by manual override", entity.buildReasonWithManualChangeDescriptions());
|
||||
|
||||
// force again, with requestDate before removal2, but after force1
|
||||
ManualForceRedaction forceRedaction2 = ManualForceRedaction.builder()
|
||||
.requestDate(start.plusSeconds(2))
|
||||
.fileId(TEST_FILE_ID)
|
||||
.annotationId(annotationId)
|
||||
.legalBasis("coolio")
|
||||
.status(AnnotationStatus.APPROVED)
|
||||
.build();
|
||||
entity.getManualOverwrite().addChange(forceRedaction2);
|
||||
assertTrue(entity.ignored());
|
||||
assertFalse(entity.applied());
|
||||
assertEquals(reason + ", removed by manual override, forced by manual override, forced by manual override, removed by manual override",
|
||||
entity.buildReasonWithManualChangeDescriptions());
|
||||
|
||||
String legalBasis = "Yeah";
|
||||
String section = "Some random section!";
|
||||
String value = "Some random value!";
|
||||
ManualLegalBasisChange legalBasisChange = ManualLegalBasisChange.builder()
|
||||
.legalBasis(legalBasis)
|
||||
.annotationId(annotationId)
|
||||
.requestDate(start.plusSeconds(4))
|
||||
.section(section)
|
||||
.status(AnnotationStatus.APPROVED)
|
||||
.user("peter")
|
||||
.value(value)
|
||||
.build();
|
||||
entity.getManualOverwrite().addChange(legalBasisChange);
|
||||
assertTrue(entity.ignored());
|
||||
assertFalse(entity.applied());
|
||||
assertEquals(reason + ", removed by manual override, forced by manual override, forced by manual override, removed by manual override, legal basis was manually changed",
|
||||
entity.buildReasonWithManualChangeDescriptions());
|
||||
assertEquals(value, entity.getManualOverwrite().getValue().orElse(entity.getValue()));
|
||||
assertEquals(legalBasis, entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis()));
|
||||
assertEquals(section, entity.getManualOverwrite().getSection().orElse(entity.getDeepestFullyContainingNode().toString()));
|
||||
|
||||
ManualImageRecategorization imageRecategorizationRequest = ManualImageRecategorization.builder()
|
||||
.type("type")
|
||||
.requestDate(start.plusSeconds(5))
|
||||
.annotationId(annotationId)
|
||||
.status(AnnotationStatus.APPROVED)
|
||||
.build();
|
||||
entity.getManualOverwrite().addChange(imageRecategorizationRequest);
|
||||
assertTrue(entity.getManualOverwrite().getRecategorized().isPresent());
|
||||
assertTrue(entity.getManualOverwrite().getRecategorized().get());
|
||||
assertEquals("type", entity.getManualOverwrite().getType().orElse(entity.getType()));
|
||||
}
|
||||
|
||||
}
|
||||
@ -5,9 +5,6 @@ import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
@ -15,7 +12,6 @@ import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.MessageType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@ -25,9 +21,6 @@ public class AnalyseFileRealDataIntegrationTest extends LiveDataIntegrationTest
|
||||
|
||||
public static final String FILE_NAME = "test-file";
|
||||
|
||||
@Autowired
|
||||
private AnnotationService annotationService;
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
@ -57,26 +50,15 @@ public class AnalyseFileRealDataIntegrationTest extends LiveDataIntegrationTest
|
||||
.fileAttributes(List.of())
|
||||
.build();
|
||||
|
||||
try {
|
||||
var text = new ClassPathResource(BASE_DIR + "data/" + FILE_NAME + ".text.json").getInputStream();
|
||||
var sectionText = new ClassPathResource(BASE_DIR + "data/" + FILE_NAME + ".section-grid.json").getInputStream();
|
||||
redactionStorageService.storeObject("dossierId", "fileId", FileType.TEXT, text);
|
||||
redactionStorageService.storeObject("dossierId", "fileId", FileType.SECTION_GRID, sectionText);
|
||||
} catch (Exception e) {
|
||||
log.info("No text file provided, Performing Structure analysis");
|
||||
|
||||
ar.setMessageType(MessageType.STRUCTURE_ANALYSE);
|
||||
redactionMessageReceiver.receiveAnalyzeRequest(ar, false);
|
||||
}
|
||||
|
||||
try {
|
||||
var redactionLog = new ClassPathResource(BASE_DIR + "data/" + FILE_NAME + ".redaction-log.json").getInputStream();
|
||||
} catch (Exception e) {
|
||||
log.info("No redaction log provided, Performing full analysis");
|
||||
|
||||
ar.setMessageType(MessageType.ANALYSE);
|
||||
redactionMessageReceiver.receiveAnalyzeRequest(ar, false);
|
||||
}
|
||||
// try {
|
||||
// var redactionLog = new ClassPathResource(BASE_DIR + "data/" + FILE_NAME + ".redaction-log.json").getInputStream();
|
||||
// } catch (Exception e) {
|
||||
// log.info("No redaction log provided, Performing full analysis");
|
||||
//
|
||||
// ar.setMessageType(MessageType.ANALYSE);
|
||||
// redactionMessageReceiver.receiveAnalyzeRequest(ar, false);
|
||||
// }
|
||||
|
||||
simulateIncrement(List.of("Desiree"), "PII", 3L);
|
||||
ar.setMessageType(MessageType.REANALYSE);
|
||||
|
||||
@ -22,10 +22,10 @@ import org.springframework.core.io.ClassPathResource;
|
||||
import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.BuildDocumentIntegrationTest;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.PositionOnPage;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
|
||||
@ -66,7 +66,7 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
|
||||
Document document = buildGraphNoImages(filePath);
|
||||
List<NerEntities.NerEntity> entityRecognitionEntities = validateAndCombine(parseNerEntities(nerEntitiesFilePath), document);
|
||||
assertFalse(entityRecognitionEntities.isEmpty());
|
||||
assertTrue(entityRecognitionEntities.stream().allMatch(entity -> entity.boundary().start() < entity.boundary().end()));
|
||||
assertTrue(entityRecognitionEntities.stream().allMatch(entity -> entity.textRange().start() < entity.textRange().end()));
|
||||
|
||||
ClassPathResource resource = new ClassPathResource(filePath);
|
||||
try (PDDocument pdDocument = Loader.loadPDF(resource.getInputStream())) {
|
||||
@ -75,8 +75,8 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
|
||||
.getNerEntityList()
|
||||
.stream()
|
||||
.filter(e -> !e.type().equals("CBI_author"));
|
||||
List<RedactionEntity> redactionEntities = Stream.concat(entityRecognitionEntities.stream(), unchangedAddressParts)
|
||||
.map(e -> entityCreationService.byBoundary(e.boundary(), e.type(), EntityType.ENTITY, document))
|
||||
List<TextEntity> redactionEntities = Stream.concat(entityRecognitionEntities.stream(), unchangedAddressParts)
|
||||
.map(e -> entityCreationService.byBoundary(e.textRange(), e.type(), EntityType.ENTITY, document))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.toList();
|
||||
@ -107,23 +107,23 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
|
||||
log.info("Parsed NerEntitiesModel");
|
||||
NerEntities nerEntities = NerEntitiesAdapter.toNerEntities(nerEntitiesModel, document);
|
||||
log.info("Validated and mapped");
|
||||
List<Boundary> nerEntityBoundaries = NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities).toList();
|
||||
List<TextRange> nerEntityBoundaries = NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities).toList();
|
||||
log.info("Combined to CBI_address");
|
||||
List<RedactionEntity> cbiAddressEntities = nerEntityBoundaries.stream()
|
||||
List<TextEntity> cbiAddressEntities = nerEntityBoundaries.stream()
|
||||
.map(b -> entityCreationService.byBoundary(b, "CBI_address", EntityType.RECOMMENDATION, document))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.toList();
|
||||
assertFalse(cbiAddressEntities.isEmpty());
|
||||
assertTrue(cbiAddressEntities.stream().allMatch(entity -> entity.getBoundary().start() < entity.getBoundary().end()));
|
||||
assertTrue(cbiAddressEntities.stream().allMatch(entity -> entity.getTextRange().start() < entity.getTextRange().end()));
|
||||
|
||||
ClassPathResource resource = new ClassPathResource(filePath);
|
||||
try (PDDocument pdDocument = Loader.loadPDF(resource.getInputStream())) {
|
||||
|
||||
List<RedactionEntity> validatedEntities = NerEntitiesAdapter.toNerEntities(parseNerEntities(nerEntitiesFilePath), document)
|
||||
List<TextEntity> validatedEntities = NerEntitiesAdapter.toNerEntities(parseNerEntities(nerEntitiesFilePath), document)
|
||||
.getNerEntityList()
|
||||
.stream()
|
||||
.map(e -> entityCreationService.byBoundary(e.boundary(), e.type(), EntityType.ENTITY, document))
|
||||
.map(e -> entityCreationService.byBoundary(e.textRange(), e.type(), EntityType.ENTITY, document))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.toList();
|
||||
@ -153,24 +153,24 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
private List<Rectangle2D> getPositionsFromEntities(Stream<RedactionEntity> entities) {
|
||||
private List<Rectangle2D> getPositionsFromEntities(Stream<TextEntity> entities) {
|
||||
|
||||
return entities.map(RedactionEntity::getRedactionPositionsPerPage)
|
||||
return entities.map(TextEntity::getPositionsOnPagePerPage)
|
||||
.flatMap(Collection::stream)
|
||||
.map(RedactionPosition::getRectanglePerLine)
|
||||
.map(PositionOnPage::getRectanglePerLine)
|
||||
.flatMap(Collection::stream)
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
private List<Rectangle2D> getPositionsFromEntityOfType(String type, List<RedactionEntity> entities) {
|
||||
private List<Rectangle2D> getPositionsFromEntityOfType(String type, List<TextEntity> entities) {
|
||||
|
||||
return getPositionsFromEntities(entities.stream().filter(e -> e.getType().equals(type)));
|
||||
|
||||
}
|
||||
|
||||
|
||||
private List<Rectangle2D> getPositionsFromEntityNotOfType(List<String> types, List<RedactionEntity> entities) {
|
||||
private List<Rectangle2D> getPositionsFromEntityNotOfType(List<String> types, List<TextEntity> entities) {
|
||||
|
||||
return getPositionsFromEntities(entities.stream().filter(e -> types.stream().noneMatch(type -> e.getType().equals(type))));
|
||||
|
||||
|
||||
@ -43,6 +43,19 @@ class DroolsExecutionServiceTest {
|
||||
assertTrue(droolsSyntaxValidation.isCompiled());
|
||||
}
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
void testAllRules() {
|
||||
|
||||
DroolsExecutionService droolsExecutionService = new DroolsExecutionService(rulesClient, entityEnrichmentService, new DroolsSyntaxValidationFactory());
|
||||
var rulesFile = new ClassPathResource("drools/all_rules.drl");
|
||||
|
||||
String rulesString = new String(rulesFile.getInputStream().readAllBytes());
|
||||
|
||||
DroolsSyntaxValidation droolsSyntaxValidation = droolsExecutionService.testRules(rulesString);
|
||||
assertTrue(droolsSyntaxValidation.isCompiled());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
|
||||
@ -33,23 +33,24 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
|
||||
global Document document
|
||||
global EntityCreationService entityCreationService
|
||||
global ManualRedactionApplicationService manualRedactionApplicationService
|
||||
global ManualChangesApplicationService manualChangesApplicationService
|
||||
global Dictionary dictionary
|
||||
|
||||
//------------------------------------ queries ------------------------------------
|
||||
@ -77,7 +78,7 @@ rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL"
|
||||
rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_author", dictionaryEntry)
|
||||
$entity: TextEntity(type == "CBI_author", dictionaryEntry)
|
||||
then
|
||||
$entity.apply("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -85,7 +86,7 @@ rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)"
|
||||
rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_author", dictionaryEntry)
|
||||
$entity: TextEntity(type == "CBI_author", dictionaryEntry)
|
||||
then
|
||||
$entity.apply("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -95,7 +96,7 @@ rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)"
|
||||
rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_address", dictionaryEntry)
|
||||
$entity: TextEntity(type == "CBI_address", dictionaryEntry)
|
||||
then
|
||||
$entity.skip("CBI.1.0", "Address found for Non Vertebrate Study");
|
||||
end
|
||||
@ -103,7 +104,7 @@ rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)"
|
||||
rule "CBI.1.1: Redact CBI Address (Vertebrate Study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_address", dictionaryEntry)
|
||||
$entity: TextEntity(type == "CBI_address", dictionaryEntry)
|
||||
then
|
||||
$entity.apply("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -112,9 +113,9 @@ rule "CBI.1.1: Redact CBI Address (Vertebrate Study)"
|
||||
// Rule unit: CBI.2
|
||||
rule "CBI.2.0: Don't redact genitive CBI_author"
|
||||
when
|
||||
$entity: RedactionEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), isApplied())
|
||||
$entity: TextEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), applied())
|
||||
then
|
||||
entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document)
|
||||
entityCreationService.byBoundary($entity.getTextRange(), "CBI_author", EntityType.FALSE_POSITIVE, document)
|
||||
.ifPresent(falsePositive -> falsePositive.skip("CBI.2.0", "Genitive Author found"));
|
||||
end
|
||||
|
||||
@ -299,7 +300,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC
|
||||
rule "PII.0.0: Redact all PII (non vertebrate study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$pii: RedactionEntity(type == "PII", dictionaryEntry)
|
||||
$pii: TextEntity(type == "PII", dictionaryEntry)
|
||||
then
|
||||
$pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -307,7 +308,7 @@ rule "PII.0.0: Redact all PII (non vertebrate study)"
|
||||
rule "PII.0.1: Redact all PII (vertebrate study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$pii: RedactionEntity(type == "PII", dictionaryEntry)
|
||||
$pii: TextEntity(type == "PII", dictionaryEntry)
|
||||
then
|
||||
$pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -453,9 +454,10 @@ rule "ETC.3.1: Redact logos (non vertebrate study)"
|
||||
rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'"
|
||||
when
|
||||
not FileAttribute(label == "Confidentiality", value == "confidential")
|
||||
$dossierRedaction: RedactionEntity(type == "dossier_redaction")
|
||||
$dossierRedaction: TextEntity(type == "dossier_redaction")
|
||||
then
|
||||
$dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential");
|
||||
update($dossierRedaction);
|
||||
$dossierRedaction.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
|
||||
@ -489,10 +491,10 @@ rule "AI.1.0: combine and add NER Entities as CBI_address"
|
||||
rule "MAN.0.0: Apply manual resize redaction"
|
||||
salience 128
|
||||
when
|
||||
$resizeRedaction: ManualResizeRedaction($id: annotationId)
|
||||
$entityToBeResized: RedactionEntity(matchesAnnotationId($id))
|
||||
$resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToBeResized: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
|
||||
manualChangesApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
|
||||
retract($resizeRedaction);
|
||||
update($entityToBeResized);
|
||||
$entityToBeResized.getIntersectingNodes().forEach(node -> update(node));
|
||||
@ -503,11 +505,10 @@ rule "MAN.0.0: Apply manual resize redaction"
|
||||
rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity"
|
||||
salience 128
|
||||
when
|
||||
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
|
||||
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
|
||||
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToBeRemoved: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToBeRemoved.ignore("MAN.1.0", "Removed by ManualRedaction");
|
||||
$entityToBeRemoved.getManualOverwrite().addChange($idRemoval);
|
||||
update($entityToBeRemoved);
|
||||
retract($idRemoval);
|
||||
$entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node));
|
||||
@ -516,11 +517,10 @@ rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to
|
||||
rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image"
|
||||
salience 128
|
||||
when
|
||||
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
|
||||
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageEntityToBeRemoved: Image($id == id)
|
||||
then
|
||||
$imageEntityToBeRemoved.ignore("MAN.1.1", "Removed by ManualRedaction");
|
||||
$imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval);
|
||||
update($imageEntityToBeRemoved);
|
||||
retract($idRemoval);
|
||||
update($imageEntityToBeRemoved.getParent());
|
||||
@ -532,29 +532,27 @@ rule "MAN.2.0: Apply force redaction"
|
||||
no-loop true
|
||||
salience 128
|
||||
when
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
|
||||
$entityToForce: RedactionEntity(matchesAnnotationId($id))
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToForce: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToForce.force("MAN.2.0", "Forced redaction", $legalBasis);
|
||||
$entityToForce.setRemoved(false);
|
||||
$entityToForce.setIgnored(false);
|
||||
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
|
||||
$entityToForce.getManualOverwrite().addChange($force);
|
||||
update($entityToForce);
|
||||
$entityToForce.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: MAN.3
|
||||
rule "MAN.3.0: Apply image recategorization"
|
||||
rule "MAN.3.0: Apply entity recategorization"
|
||||
salience 128
|
||||
when
|
||||
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
|
||||
$imageToBeRecategorized: Image($id == id)
|
||||
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToBeRecategorized: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$imageToBeRecategorized.setImageType(ImageType.fromString($imageType));
|
||||
update($imageToBeRecategorized);
|
||||
update($imageToBeRecategorized.getParent());
|
||||
$entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node));
|
||||
manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization);
|
||||
retract($recategorization);
|
||||
// Entity is copied and inserted, so the old entity needs to be retracted to avoid duplication.
|
||||
retract($entityToBeRecategorized);
|
||||
end
|
||||
|
||||
|
||||
@ -564,8 +562,8 @@ rule "MAN.3.0: Apply image recategorization"
|
||||
rule "X.0.0: remove Entity contained by Entity of same type"
|
||||
salience 65
|
||||
when
|
||||
$larger: RedactionEntity($type: type, $entityType: entityType, isActive())
|
||||
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$larger: TextEntity($type: type, $entityType: entityType, active())
|
||||
$contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized(), active())
|
||||
then
|
||||
$contained.remove("X.0.0", "remove Entity contained by Entity of same type");
|
||||
retract($contained);
|
||||
@ -576,10 +574,10 @@ rule "X.0.0: remove Entity contained by Entity of same type"
|
||||
rule "X.1.0: merge intersecting Entities of same type"
|
||||
salience 64
|
||||
when
|
||||
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$first: TextEntity($type: type, $entityType: entityType, !resized(), active())
|
||||
$second: TextEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized(), active())
|
||||
then
|
||||
RedactionEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document);
|
||||
TextEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document);
|
||||
$first.remove("X.1.0", "merge intersecting Entities of same type");
|
||||
$second.remove("X.1.0", "merge intersecting Entities of same type");
|
||||
retract($first);
|
||||
@ -592,8 +590,8 @@ rule "X.1.0: merge intersecting Entities of same type"
|
||||
rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
|
||||
salience 64
|
||||
when
|
||||
$falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE, isActive())
|
||||
$entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active())
|
||||
$entity: TextEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized(), active())
|
||||
then
|
||||
$entity.getIntersectingNodes().forEach(node -> update(node));
|
||||
$entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE");
|
||||
@ -605,8 +603,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
|
||||
rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"
|
||||
salience 64
|
||||
when
|
||||
$falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, isActive())
|
||||
$recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active())
|
||||
$recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active())
|
||||
then
|
||||
$recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION");
|
||||
retract($recommendation);
|
||||
@ -617,8 +615,8 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM
|
||||
rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"
|
||||
salience 256
|
||||
when
|
||||
$entity: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive())
|
||||
$recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$entity: TextEntity($type: type, entityType == EntityType.ENTITY, active())
|
||||
$recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active())
|
||||
then
|
||||
$entity.addEngines($recommendation.getEngines());
|
||||
$recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type");
|
||||
@ -630,8 +628,8 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit
|
||||
rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
|
||||
salience 256
|
||||
when
|
||||
$entity: RedactionEntity(entityType == EntityType.ENTITY, isActive())
|
||||
$recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$entity: TextEntity(entityType == EntityType.ENTITY, active())
|
||||
$recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized(), active())
|
||||
then
|
||||
$recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY");
|
||||
retract($recommendation);
|
||||
@ -642,8 +640,8 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
|
||||
rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY"
|
||||
salience 32
|
||||
when
|
||||
$higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive())
|
||||
$lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$higherRank: TextEntity($type: type, entityType == EntityType.ENTITY, active())
|
||||
$lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized(), active())
|
||||
then
|
||||
$lowerRank.getIntersectingNodes().forEach(node -> update(node));
|
||||
$lowerRank.remove("X.6.0", "remove Entity of lower rank, when intersected by entity of type ENTITY");
|
||||
|
||||
@ -33,23 +33,24 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
|
||||
global Document document
|
||||
global EntityCreationService entityCreationService
|
||||
global ManualRedactionApplicationService manualRedactionApplicationService
|
||||
global ManualChangesApplicationService manualChangesApplicationService
|
||||
global Dictionary dictionary
|
||||
|
||||
//------------------------------------ queries ------------------------------------
|
||||
@ -90,7 +91,7 @@ rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL"
|
||||
rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_author", dictionaryEntry)
|
||||
$entity: TextEntity(type == "CBI_author", dictionaryEntry)
|
||||
then
|
||||
$entity.apply("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -98,7 +99,7 @@ rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)"
|
||||
rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_author", dictionaryEntry)
|
||||
$entity: TextEntity(type == "CBI_author", dictionaryEntry)
|
||||
then
|
||||
$entity.apply("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -108,7 +109,7 @@ rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)"
|
||||
rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_address", dictionaryEntry)
|
||||
$entity: TextEntity(type == "CBI_address", dictionaryEntry)
|
||||
then
|
||||
$entity.skip("CBI.1.0", "Address found for Non Vertebrate Study");
|
||||
end
|
||||
@ -116,7 +117,7 @@ rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)"
|
||||
rule "CBI.1.1: Redact CBI Address (Vertebrate Study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_address", dictionaryEntry)
|
||||
$entity: TextEntity(type == "CBI_address", dictionaryEntry)
|
||||
then
|
||||
$entity.apply("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -125,9 +126,9 @@ rule "CBI.1.1: Redact CBI Address (Vertebrate Study)"
|
||||
// Rule unit: CBI.2
|
||||
rule "CBI.2.0: Don't redact genitive CBI_author"
|
||||
when
|
||||
$entity: RedactionEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), isApplied())
|
||||
$entity: TextEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), applied())
|
||||
then
|
||||
entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document)
|
||||
entityCreationService.byBoundary($entity.getTextRange(), "CBI_author", EntityType.FALSE_POSITIVE, document)
|
||||
.ifPresent(falsePositive -> falsePositive.skip("CBI.2.0", "Genitive Author found"));
|
||||
end
|
||||
|
||||
@ -474,7 +475,7 @@ rule "CBI.12.2: Redact CBI_author, if its row contains a cell with header \"Vert
|
||||
rule "CBI.13.0: Ignore CBI Address Recommendations"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_address", entityType == EntityType.RECOMMENDATION)
|
||||
$entity: TextEntity(type == "CBI_address", entityType == EntityType.RECOMMENDATION)
|
||||
then
|
||||
$entity.ignore("CBI.13.0", "Ignore CBI Address Recommendations");
|
||||
retract($entity)
|
||||
@ -484,7 +485,7 @@ rule "CBI.13.0: Ignore CBI Address Recommendations"
|
||||
// Rule unit: CBI.14
|
||||
rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at\""
|
||||
when
|
||||
$sponsorEntity: RedactionEntity(type == "CBI_sponsor", textBefore.contains("batches produced at"))
|
||||
$sponsorEntity: TextEntity(type == "CBI_sponsor", textBefore.contains("batches produced at"))
|
||||
then
|
||||
$sponsorEntity.apply("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
end
|
||||
@ -587,7 +588,7 @@ rule "CBI.17.1: Add recommendation for Addresses in Test Organism sections, with
|
||||
rule "CBI.18.0: Expand CBI_author entities with firstname initials"
|
||||
no-loop true
|
||||
when
|
||||
$entityToExpand: RedactionEntity(type == "CBI_author",
|
||||
$entityToExpand: TextEntity(type == "CBI_author",
|
||||
value.matches("[^\\s]+"),
|
||||
textAfter.startsWith(" "),
|
||||
anyMatch(textAfter, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)")
|
||||
@ -595,7 +596,7 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials"
|
||||
then
|
||||
entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)")
|
||||
.ifPresent(expandedEntity -> {
|
||||
expandedEntity.setMatchedRuleList($entityToExpand.getMatchedRuleList());
|
||||
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
|
||||
$entityToExpand.remove("CBI.18.0", "Expand CBI_author entities with firstname initials");
|
||||
retract($entityToExpand);
|
||||
});
|
||||
@ -605,11 +606,11 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials"
|
||||
// Rule unit: CBI.19
|
||||
rule "CBI.19.0: Expand CBI_author entities with salutation prefix"
|
||||
when
|
||||
$entityToExpand: RedactionEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
|
||||
$entityToExpand: TextEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
|
||||
then
|
||||
entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")
|
||||
.ifPresent(expandedEntity -> {
|
||||
expandedEntity.setMatchedRuleList($entityToExpand.getMatchedRuleList());
|
||||
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
|
||||
$entityToExpand.remove("CBI.19.0", "Expand CBI_author entities with salutation prefix");
|
||||
retract($entityToExpand);
|
||||
});
|
||||
@ -650,7 +651,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC
|
||||
rule "PII.0.0: Redact all PII (non vertebrate study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$pii: RedactionEntity(type == "PII", dictionaryEntry)
|
||||
$pii: TextEntity(type == "PII", dictionaryEntry)
|
||||
then
|
||||
$pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -658,7 +659,7 @@ rule "PII.0.0: Redact all PII (non vertebrate study)"
|
||||
rule "PII.0.1: Redact all PII (vertebrate study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$pii: RedactionEntity(type == "PII", dictionaryEntry)
|
||||
$pii: TextEntity(type == "PII", dictionaryEntry)
|
||||
then
|
||||
$pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -991,10 +992,10 @@ rule "PII.11.0: Redact On behalf of Sequani Ltd.:"
|
||||
// Rule unit: PII.12
|
||||
rule "PII.12.0: Expand PII entities with salutation prefix"
|
||||
when
|
||||
$entityToExpand: RedactionEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
|
||||
$entityToExpand: TextEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
|
||||
then
|
||||
entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")
|
||||
.ifPresent(expandedEntity -> expandedEntity.setMatchedRuleList($entityToExpand.getMatchedRuleList()));
|
||||
.ifPresent(expandedEntity -> expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()));
|
||||
end
|
||||
|
||||
|
||||
@ -1059,7 +1060,7 @@ rule "ETC.3.1: Redact logos (non vertebrate study)"
|
||||
// Rule unit: ETC.4
|
||||
rule "ETC.4.0: Redact dossier dictionary entries"
|
||||
when
|
||||
$dossierRedaction: RedactionEntity(type == "dossier_redaction")
|
||||
$dossierRedaction: TextEntity(type == "dossier_redaction")
|
||||
then
|
||||
$dossierRedaction.apply("ETC.4.0", "Specification of impurity found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -1069,7 +1070,7 @@ rule "ETC.4.0: Redact dossier dictionary entries"
|
||||
rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'"
|
||||
when
|
||||
not FileAttribute(label == "Confidentiality", value == "confidential")
|
||||
$dossierRedaction: RedactionEntity(type == "dossier_redaction")
|
||||
$dossierRedaction: TextEntity(type == "dossier_redaction")
|
||||
then
|
||||
$dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential");
|
||||
update($dossierRedaction);
|
||||
@ -1161,10 +1162,10 @@ rule "AI.2.0: add all NER Entities of any type except CBI_author"
|
||||
rule "MAN.0.0: Apply manual resize redaction"
|
||||
salience 128
|
||||
when
|
||||
$resizeRedaction: ManualResizeRedaction($id: annotationId)
|
||||
$entityToBeResized: RedactionEntity(matchesAnnotationId($id))
|
||||
$resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToBeResized: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
|
||||
manualChangesApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
|
||||
retract($resizeRedaction);
|
||||
update($entityToBeResized);
|
||||
$entityToBeResized.getIntersectingNodes().forEach(node -> update(node));
|
||||
@ -1173,10 +1174,10 @@ rule "MAN.0.0: Apply manual resize redaction"
|
||||
rule "MAN.0.1: Apply manual resize redaction"
|
||||
salience 128
|
||||
when
|
||||
$resizeRedaction: ManualResizeRedaction($id: annotationId)
|
||||
$resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageToBeResized: Image(id == $id)
|
||||
then
|
||||
manualRedactionApplicationService.resizeImage($imageToBeResized, $resizeRedaction);
|
||||
manualChangesApplicationService.resizeImage($imageToBeResized, $resizeRedaction);
|
||||
retract($resizeRedaction);
|
||||
update($imageToBeResized);
|
||||
update($imageToBeResized.getParent());
|
||||
@ -1187,11 +1188,10 @@ rule "MAN.0.1: Apply manual resize redaction"
|
||||
rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity"
|
||||
salience 128
|
||||
when
|
||||
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
|
||||
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
|
||||
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToBeRemoved: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToBeRemoved.ignore("MAN.1.0", "Removed by ManualRedaction");
|
||||
$entityToBeRemoved.getManualOverwrite().addChange($idRemoval);
|
||||
update($entityToBeRemoved);
|
||||
retract($idRemoval);
|
||||
$entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node));
|
||||
@ -1200,11 +1200,10 @@ rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to
|
||||
rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image"
|
||||
salience 128
|
||||
when
|
||||
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
|
||||
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageEntityToBeRemoved: Image($id == id)
|
||||
then
|
||||
$imageEntityToBeRemoved.ignore("MAN.1.1", "Removed by ManualRedaction");
|
||||
$imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval);
|
||||
update($imageEntityToBeRemoved);
|
||||
retract($idRemoval);
|
||||
update($imageEntityToBeRemoved.getParent());
|
||||
@ -1216,13 +1215,10 @@ rule "MAN.2.0: Apply force redaction"
|
||||
no-loop true
|
||||
salience 128
|
||||
when
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
|
||||
$entityToForce: RedactionEntity(matchesAnnotationId($id))
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToForce: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToForce.force("MAN.2.0", "Forced redaction", $legalBasis);
|
||||
$entityToForce.setRemoved(false);
|
||||
$entityToForce.setIgnored(false);
|
||||
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
|
||||
$entityToForce.getManualOverwrite().addChange($force);
|
||||
update($entityToForce);
|
||||
$entityToForce.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
@ -1231,29 +1227,60 @@ rule "MAN.2.1: Apply force redaction to images"
|
||||
no-loop true
|
||||
salience 128
|
||||
when
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageToForce: Image(id == $id)
|
||||
then
|
||||
$imageToForce.force("MAN.2.0", "Forced redaction", $legalBasis);
|
||||
$imageToForce.setRemoved(false);
|
||||
$imageToForce.setIgnored(false);
|
||||
$imageToForce.getManualOverwrite().addChange($force);
|
||||
update($imageToForce);
|
||||
update($imageToForce.getParent());
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: MAN.3
|
||||
rule "MAN.3.0: Apply image recategorization"
|
||||
rule "MAN.3.0: Apply entity recategorization"
|
||||
salience 128
|
||||
when
|
||||
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
|
||||
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToBeRecategorized: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node));
|
||||
manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization);
|
||||
retract($recategorization);
|
||||
// Entity is copied and inserted, so the old entity needs to be retracted to avoid duplication.
|
||||
retract($entityToBeRecategorized);
|
||||
end
|
||||
|
||||
rule "MAN.3.1: Apply image recategorization"
|
||||
salience 128
|
||||
when
|
||||
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageToBeRecategorized: Image($id == id)
|
||||
then
|
||||
$imageToBeRecategorized.setImageType(ImageType.fromString($imageType));
|
||||
manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization);
|
||||
update($imageToBeRecategorized);
|
||||
update($imageToBeRecategorized.getParent());
|
||||
retract($recategorization);
|
||||
end
|
||||
|
||||
// Rule unit: MAN.4
|
||||
rule "MAN.4.0: Apply legal basis change"
|
||||
salience 128
|
||||
when
|
||||
$legalbasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageToBeRecategorized: Image($id == id)
|
||||
then
|
||||
$imageToBeRecategorized.getManualOverwrite().addChange($legalbasisChange);
|
||||
end
|
||||
|
||||
rule "MAN.4.1: Apply legal basis change"
|
||||
salience 128
|
||||
when
|
||||
$legalBasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToBeChanged: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToBeChanged.getManualOverwrite().addChange($legalBasisChange);
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ Entity merging rules ------------------------------------
|
||||
|
||||
@ -1261,8 +1288,8 @@ rule "MAN.3.0: Apply image recategorization"
|
||||
rule "X.0.0: remove Entity contained by Entity of same type"
|
||||
salience 65
|
||||
when
|
||||
$larger: RedactionEntity($type: type, $entityType: entityType, isActive())
|
||||
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$larger: TextEntity($type: type, $entityType: entityType, active())
|
||||
$contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized(), active())
|
||||
then
|
||||
$contained.remove("X.0.0", "remove Entity contained by Entity of same type");
|
||||
retract($contained);
|
||||
@ -1273,10 +1300,10 @@ rule "X.0.0: remove Entity contained by Entity of same type"
|
||||
rule "X.1.0: merge intersecting Entities of same type"
|
||||
salience 64
|
||||
when
|
||||
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$first: TextEntity($type: type, $entityType: entityType, !resized(), active())
|
||||
$second: TextEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized(), active())
|
||||
then
|
||||
RedactionEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document);
|
||||
TextEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document);
|
||||
$first.remove("X.1.0", "merge intersecting Entities of same type");
|
||||
$second.remove("X.1.0", "merge intersecting Entities of same type");
|
||||
retract($first);
|
||||
@ -1289,8 +1316,8 @@ rule "X.1.0: merge intersecting Entities of same type"
|
||||
rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
|
||||
salience 64
|
||||
when
|
||||
$falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE, isActive())
|
||||
$entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active())
|
||||
$entity: TextEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized(), active())
|
||||
then
|
||||
$entity.getIntersectingNodes().forEach(node -> update(node));
|
||||
$entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE");
|
||||
@ -1302,8 +1329,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
|
||||
rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"
|
||||
salience 64
|
||||
when
|
||||
$falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, isActive())
|
||||
$recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active())
|
||||
$recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active())
|
||||
then
|
||||
$recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION");
|
||||
retract($recommendation);
|
||||
@ -1314,8 +1341,8 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM
|
||||
rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"
|
||||
salience 256
|
||||
when
|
||||
$entity: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive())
|
||||
$recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$entity: TextEntity($type: type, entityType == EntityType.ENTITY, active())
|
||||
$recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active())
|
||||
then
|
||||
$entity.addEngines($recommendation.getEngines());
|
||||
$recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type");
|
||||
@ -1327,8 +1354,8 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit
|
||||
rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
|
||||
salience 256
|
||||
when
|
||||
$entity: RedactionEntity(entityType == EntityType.ENTITY, isActive())
|
||||
$recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$entity: TextEntity(entityType == EntityType.ENTITY, active())
|
||||
$recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized(), active())
|
||||
then
|
||||
$recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY");
|
||||
retract($recommendation);
|
||||
@ -1339,8 +1366,8 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
|
||||
rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY"
|
||||
salience 32
|
||||
when
|
||||
$higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive())
|
||||
$lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$higherRank: TextEntity($type: type, entityType == EntityType.ENTITY, active())
|
||||
$lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized(), active())
|
||||
then
|
||||
$lowerRank.getIntersectingNodes().forEach(node -> update(node));
|
||||
$lowerRank.remove("X.6.0", "remove Entity of lower rank, when intersected by entity of type ENTITY");
|
||||
|
||||
@ -34,22 +34,22 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
|
||||
global Document document
|
||||
global EntityCreationService entityCreationService
|
||||
global ManualRedactionApplicationService manualRedactionApplicationService
|
||||
global ManualChangesApplicationService manualRedactionApplicationService
|
||||
global Dictionary dictionary
|
||||
|
||||
//------------------------------------ queries ------------------------------------
|
||||
@ -397,7 +397,7 @@ rule "DOC.8.1: Performing Laboratory (Name)"
|
||||
$section: Section(containsString("PERFORMING LABORATORY:"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("COUNTRY")
|
||||
.filter(nerEntity -> $section.getBoundary().contains(nerEntity.boundary()))
|
||||
.filter(nerEntity -> $section.getTextRange().contains(nerEntity.textRange()))
|
||||
.map(nerEntity -> entityCreationService.byNerEntity(nerEntity, "laboratory_country", EntityType.ENTITY, $section))
|
||||
.forEach(entity -> {
|
||||
entity.apply("DOC.8.2", "Performing Laboratory found", "n-a");
|
||||
@ -1221,7 +1221,7 @@ rule "DOC.44.0: Results (Main Study)"
|
||||
FileAttribute(label == "OECD Number", value == "429")
|
||||
$section: Section(
|
||||
getHeadline().containsString("Results")
|
||||
&& getHeadline().getBoundary().length() < 20
|
||||
&& getHeadline().getGetTextRange().length() < 20
|
||||
&& !(getHeadline().containsString("Appendix") || getHeadline().containsString("Table"))
|
||||
)
|
||||
then
|
||||
@ -1262,7 +1262,7 @@ rule "MAN.0.0: Apply manual resize redaction"
|
||||
salience 128
|
||||
when
|
||||
$resizeRedaction: ManualResizeRedaction($id: annotationId)
|
||||
$entityToBeResized: RedactionEntity(matchesAnnotationId($id))
|
||||
$entityToBeResized: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
|
||||
retract($resizeRedaction);
|
||||
@ -1276,7 +1276,7 @@ rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to
|
||||
when
|
||||
IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
|
||||
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
|
||||
$entityToBeRemoved: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToBeRemoved.removeFromGraph();
|
||||
retract($entityToBeRemoved);
|
||||
@ -1298,7 +1298,7 @@ rule "MAN.2.0: Apply force redaction"
|
||||
salience 128
|
||||
when
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
|
||||
$entityToForce: RedactionEntity(matchesAnnotationId($id))
|
||||
$entityToForce: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis);
|
||||
$entityToForce.setRemoved(false);
|
||||
@ -1328,8 +1328,8 @@ rule "MAN.3.0: Apply image recategorization"
|
||||
rule "X.0.0: remove Entity contained by Entity of same type"
|
||||
salience 65
|
||||
when
|
||||
$larger: RedactionEntity($type: type, $entityType: entityType, isActive())
|
||||
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$larger: TextEntity($type: type, $entityType: entityType, getActive())
|
||||
$contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, getActive())
|
||||
then
|
||||
$contained.remove("X.0.0", "remove Entity contained by Entity of same type");
|
||||
retract($contained);
|
||||
|
||||
@ -33,23 +33,24 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
|
||||
global Document document
|
||||
global EntityCreationService entityCreationService
|
||||
global ManualRedactionApplicationService manualRedactionApplicationService
|
||||
global ManualChangesApplicationService manualChangesApplicationService
|
||||
global Dictionary dictionary
|
||||
|
||||
//------------------------------------ queries ------------------------------------
|
||||
@ -58,14 +59,16 @@ query "getFileAttributes"
|
||||
$fileAttribute: FileAttribute()
|
||||
end
|
||||
|
||||
//------------------------------------ Manual redaction rules ------------------------------------
|
||||
|
||||
// Rule unit: MAN.0
|
||||
rule "MAN.0.0: Apply manual resize redaction"
|
||||
salience 128
|
||||
when
|
||||
$resizeRedaction: ManualResizeRedaction($id: annotationId)
|
||||
$entityToBeResized: RedactionEntity(matchesAnnotationId($id))
|
||||
$resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToBeResized: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
|
||||
manualChangesApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
|
||||
retract($resizeRedaction);
|
||||
update($entityToBeResized);
|
||||
$entityToBeResized.getIntersectingNodes().forEach(node -> update(node));
|
||||
@ -74,10 +77,10 @@ rule "MAN.0.0: Apply manual resize redaction"
|
||||
rule "MAN.0.1: Apply manual resize redaction"
|
||||
salience 128
|
||||
when
|
||||
$resizeRedaction: ManualResizeRedaction($id: annotationId)
|
||||
$resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageToBeResized: Image(id == $id)
|
||||
then
|
||||
manualRedactionApplicationService.resizeImage($imageToBeResized, $resizeRedaction);
|
||||
manualChangesApplicationService.resizeImage($imageToBeResized, $resizeRedaction);
|
||||
retract($resizeRedaction);
|
||||
update($imageToBeResized);
|
||||
update($imageToBeResized.getParent());
|
||||
@ -88,11 +91,10 @@ rule "MAN.0.1: Apply manual resize redaction"
|
||||
rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity"
|
||||
salience 128
|
||||
when
|
||||
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
|
||||
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
|
||||
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToBeRemoved: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToBeRemoved.ignore("MAN.1.0", "Removed by ManualRedaction");
|
||||
$entityToBeRemoved.getManualOverwrite().addChange($idRemoval);
|
||||
update($entityToBeRemoved);
|
||||
retract($idRemoval);
|
||||
$entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node));
|
||||
@ -101,11 +103,10 @@ rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to
|
||||
rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image"
|
||||
salience 128
|
||||
when
|
||||
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
|
||||
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageEntityToBeRemoved: Image($id == id)
|
||||
then
|
||||
$imageEntityToBeRemoved.ignore("MAN.1.1", "Removed by ManualRedaction");
|
||||
$imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval);
|
||||
update($imageEntityToBeRemoved);
|
||||
retract($idRemoval);
|
||||
update($imageEntityToBeRemoved.getParent());
|
||||
@ -117,13 +118,10 @@ rule "MAN.2.0: Apply force redaction"
|
||||
no-loop true
|
||||
salience 128
|
||||
when
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
|
||||
$entityToForce: RedactionEntity(matchesAnnotationId($id))
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToForce: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToForce.force("MAN.2.0", "Forced redaction", $legalBasis);
|
||||
$entityToForce.setRemoved(false);
|
||||
$entityToForce.setIgnored(false);
|
||||
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
|
||||
$entityToForce.getManualOverwrite().addChange($force);
|
||||
update($entityToForce);
|
||||
$entityToForce.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
@ -132,29 +130,61 @@ rule "MAN.2.1: Apply force redaction to images"
|
||||
no-loop true
|
||||
salience 128
|
||||
when
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageToForce: Image(id == $id)
|
||||
then
|
||||
$imageToForce.force("MAN.2.0", "Forced redaction", $legalBasis);
|
||||
$imageToForce.setRemoved(false);
|
||||
$imageToForce.setIgnored(false);
|
||||
$imageToForce.getManualOverwrite().addChange($force);
|
||||
update($imageToForce);
|
||||
update($imageToForce.getParent());
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: MAN.3
|
||||
rule "MAN.3.0: Apply image recategorization"
|
||||
rule "MAN.3.0: Apply entity recategorization"
|
||||
salience 128
|
||||
when
|
||||
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
|
||||
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToBeRecategorized: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization);
|
||||
$entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node));
|
||||
retract($recategorization);
|
||||
// Entity is copied and inserted, so the old entity needs to be retracted to avoid duplication.
|
||||
retract($entityToBeRecategorized);
|
||||
end
|
||||
|
||||
rule "MAN.3.1: Apply image recategorization"
|
||||
salience 128
|
||||
when
|
||||
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageToBeRecategorized: Image($id == id)
|
||||
then
|
||||
$imageToBeRecategorized.setImageType(ImageType.fromString($imageType));
|
||||
manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization);
|
||||
update($imageToBeRecategorized);
|
||||
update($imageToBeRecategorized.getParent());
|
||||
retract($recategorization);
|
||||
end
|
||||
|
||||
// Rule unit: MAN.4
|
||||
rule "MAN.4.0: Apply legal basis change"
|
||||
salience 128
|
||||
when
|
||||
$legalbasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageToBeRecategorized: Image($id == id)
|
||||
then
|
||||
$imageToBeRecategorized.getManualOverwrite().addChange($legalbasisChange);
|
||||
end
|
||||
|
||||
rule "MAN.4.1: Apply legal basis change"
|
||||
salience 128
|
||||
when
|
||||
$legalBasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToBeChanged: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToBeChanged.getManualOverwrite().addChange($legalBasisChange);
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ Local dictionary search rules ------------------------------------
|
||||
|
||||
// Rule unit: LDS.0
|
||||
|
||||
@ -33,23 +33,24 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
|
||||
global Document document
|
||||
global EntityCreationService entityCreationService
|
||||
global ManualRedactionApplicationService manualRedactionApplicationService
|
||||
global ManualChangesApplicationService manualChangesApplicationService
|
||||
global Dictionary dictionary
|
||||
|
||||
//------------------------------------ queries ------------------------------------
|
||||
@ -315,7 +316,7 @@ rule "CBI.12.2: Redact CBI_author, if its row contains a cell with header \"Vert
|
||||
// Rule unit: CBI.14
|
||||
rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at\""
|
||||
when
|
||||
$sponsorEntity: RedactionEntity(type == "CBI_sponsor", textBefore.contains("batches produced at"))
|
||||
$sponsorEntity: TextEntity(type == "CBI_sponsor", textBefore.contains("batches produced at"))
|
||||
then
|
||||
$sponsorEntity.apply("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
end
|
||||
@ -418,7 +419,7 @@ rule "CBI.17.1: Add recommendation for Addresses in Test Organism sections, with
|
||||
rule "CBI.18.0: Expand CBI_author entities with firstname initials"
|
||||
no-loop true
|
||||
when
|
||||
$entityToExpand: RedactionEntity(type == "CBI_author",
|
||||
$entityToExpand: TextEntity(type == "CBI_author",
|
||||
value.matches("[^\\s]+"),
|
||||
textAfter.startsWith(" "),
|
||||
anyMatch(textAfter, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)")
|
||||
@ -426,7 +427,7 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials"
|
||||
then
|
||||
entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)")
|
||||
.ifPresent(expandedEntity -> {
|
||||
expandedEntity.setMatchedRuleList($entityToExpand.getMatchedRuleList());
|
||||
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
|
||||
$entityToExpand.remove("CBI.18.0", "Expand CBI_author entities with firstname initials");
|
||||
retract($entityToExpand);
|
||||
});
|
||||
@ -436,11 +437,11 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials"
|
||||
// Rule unit: CBI.19
|
||||
rule "CBI.19.0: Expand CBI_author entities with salutation prefix"
|
||||
when
|
||||
$entityToExpand: RedactionEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
|
||||
$entityToExpand: TextEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
|
||||
then
|
||||
entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")
|
||||
.ifPresent(expandedEntity -> {
|
||||
expandedEntity.setMatchedRuleList($entityToExpand.getMatchedRuleList());
|
||||
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
|
||||
$entityToExpand.remove("CBI.19.0", "Expand CBI_author entities with salutation prefix");
|
||||
retract($entityToExpand);
|
||||
});
|
||||
@ -481,7 +482,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC
|
||||
rule "PII.0.0: Redact all PII (non vertebrate study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$pii: RedactionEntity(type == "PII", dictionaryEntry)
|
||||
$pii: TextEntity(type == "PII", dictionaryEntry)
|
||||
then
|
||||
$pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -489,7 +490,7 @@ rule "PII.0.0: Redact all PII (non vertebrate study)"
|
||||
rule "PII.0.1: Redact all PII (vertebrate study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$pii: RedactionEntity(type == "PII", dictionaryEntry)
|
||||
$pii: TextEntity(type == "PII", dictionaryEntry)
|
||||
then
|
||||
$pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -726,10 +727,10 @@ rule "PII.11.0: Redact On behalf of Sequani Ltd.:"
|
||||
// Rule unit: PII.12
|
||||
rule "PII.12.0: Expand PII entities with salutation prefix"
|
||||
when
|
||||
$entityToExpand: RedactionEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
|
||||
$entityToExpand: TextEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
|
||||
then
|
||||
entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")
|
||||
.ifPresent(expandedEntity -> expandedEntity.setMatchedRuleList($entityToExpand.getMatchedRuleList()));
|
||||
.ifPresent(expandedEntity -> expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()));
|
||||
end
|
||||
|
||||
|
||||
@ -784,7 +785,7 @@ rule "ETC.3.1: Redact logos (non vertebrate study)"
|
||||
// Rule unit: ETC.4
|
||||
rule "ETC.4.0: Redact dossier dictionary entries"
|
||||
when
|
||||
$dossierRedaction: RedactionEntity(type == "dossier_redaction")
|
||||
$dossierRedaction: TextEntity(type == "dossier_redaction")
|
||||
then
|
||||
$dossierRedaction.apply("ETC.4.0", "Specification of impurity found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -794,7 +795,7 @@ rule "ETC.4.0: Redact dossier dictionary entries"
|
||||
rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'"
|
||||
when
|
||||
not FileAttribute(label == "Confidentiality", value == "confidential")
|
||||
$dossierRedaction: RedactionEntity(type == "dossier_redaction")
|
||||
$dossierRedaction: TextEntity(type == "dossier_redaction")
|
||||
then
|
||||
$dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential");
|
||||
update($dossierRedaction);
|
||||
@ -874,10 +875,10 @@ rule "AI.1.0: combine and add NER Entities as CBI_address"
|
||||
rule "MAN.0.0: Apply manual resize redaction"
|
||||
salience 128
|
||||
when
|
||||
$resizeRedaction: ManualResizeRedaction($id: annotationId)
|
||||
$entityToBeResized: RedactionEntity(matchesAnnotationId($id))
|
||||
$resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToBeResized: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
|
||||
manualChangesApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
|
||||
retract($resizeRedaction);
|
||||
update($entityToBeResized);
|
||||
$entityToBeResized.getIntersectingNodes().forEach(node -> update(node));
|
||||
@ -886,10 +887,10 @@ rule "MAN.0.0: Apply manual resize redaction"
|
||||
rule "MAN.0.1: Apply manual resize redaction"
|
||||
salience 128
|
||||
when
|
||||
$resizeRedaction: ManualResizeRedaction($id: annotationId)
|
||||
$resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageToBeResized: Image(id == $id)
|
||||
then
|
||||
manualRedactionApplicationService.resizeImage($imageToBeResized, $resizeRedaction);
|
||||
manualChangesApplicationService.resizeImage($imageToBeResized, $resizeRedaction);
|
||||
retract($resizeRedaction);
|
||||
update($imageToBeResized);
|
||||
update($imageToBeResized.getParent());
|
||||
@ -900,11 +901,10 @@ rule "MAN.0.1: Apply manual resize redaction"
|
||||
rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity"
|
||||
salience 128
|
||||
when
|
||||
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
|
||||
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
|
||||
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToBeRemoved: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToBeRemoved.ignore("MAN.1.0", "Removed by ManualRedaction");
|
||||
$entityToBeRemoved.getManualOverwrite().addChange($idRemoval);
|
||||
update($entityToBeRemoved);
|
||||
retract($idRemoval);
|
||||
$entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node));
|
||||
@ -913,11 +913,10 @@ rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to
|
||||
rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image"
|
||||
salience 128
|
||||
when
|
||||
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
|
||||
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageEntityToBeRemoved: Image($id == id)
|
||||
then
|
||||
$imageEntityToBeRemoved.ignore("MAN.1.1", "Removed by ManualRedaction");
|
||||
$imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval);
|
||||
update($imageEntityToBeRemoved);
|
||||
retract($idRemoval);
|
||||
update($imageEntityToBeRemoved.getParent());
|
||||
@ -929,13 +928,10 @@ rule "MAN.2.0: Apply force redaction"
|
||||
no-loop true
|
||||
salience 128
|
||||
when
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
|
||||
$entityToForce: RedactionEntity(matchesAnnotationId($id))
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToForce: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToForce.force("MAN.2.0", "Forced redaction", $legalBasis);
|
||||
$entityToForce.setRemoved(false);
|
||||
$entityToForce.setIgnored(false);
|
||||
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
|
||||
$entityToForce.getManualOverwrite().addChange($force);
|
||||
update($entityToForce);
|
||||
$entityToForce.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
@ -944,39 +940,70 @@ rule "MAN.2.1: Apply force redaction to images"
|
||||
no-loop true
|
||||
salience 128
|
||||
when
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageToForce: Image(id == $id)
|
||||
then
|
||||
$imageToForce.force("MAN.2.0", "Forced redaction", $legalBasis);
|
||||
$imageToForce.setRemoved(false);
|
||||
$imageToForce.setIgnored(false);
|
||||
$imageToForce.getManualOverwrite().addChange($force);
|
||||
update($imageToForce);
|
||||
update($imageToForce.getParent());
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: MAN.3
|
||||
rule "MAN.3.0: Apply image recategorization"
|
||||
rule "MAN.3.0: Apply entity recategorization"
|
||||
salience 128
|
||||
when
|
||||
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
|
||||
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToBeRecategorized: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node));
|
||||
manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization);
|
||||
retract($recategorization);
|
||||
// Entity is copied and inserted, so the old entity needs to be retracted to avoid duplication.
|
||||
retract($entityToBeRecategorized);
|
||||
end
|
||||
|
||||
rule "MAN.3.1: Apply image recategorization"
|
||||
salience 128
|
||||
when
|
||||
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageToBeRecategorized: Image($id == id)
|
||||
then
|
||||
$imageToBeRecategorized.setImageType(ImageType.fromString($imageType));
|
||||
manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization);
|
||||
update($imageToBeRecategorized);
|
||||
update($imageToBeRecategorized.getParent());
|
||||
retract($recategorization);
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: MAN.4
|
||||
rule "MAN.4.0: Apply legal basis change"
|
||||
salience 128
|
||||
when
|
||||
$legalbasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageToBeRecategorized: Image($id == id)
|
||||
then
|
||||
$imageToBeRecategorized.getManualOverwrite().addChange($legalbasisChange);
|
||||
end
|
||||
|
||||
rule "MAN.4.1: Apply legal basis change"
|
||||
salience 128
|
||||
when
|
||||
$legalBasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToBeChanged: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToBeChanged.getManualOverwrite().addChange($legalBasisChange);
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ Entity merging rules ------------------------------------
|
||||
|
||||
// Rule unit: X.0
|
||||
rule "X.0.0: remove Entity contained by Entity of same type"
|
||||
salience 65
|
||||
when
|
||||
$larger: RedactionEntity($type: type, $entityType: entityType, isActive())
|
||||
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$larger: TextEntity($type: type, $entityType: entityType, active())
|
||||
$contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized(), active())
|
||||
then
|
||||
$contained.remove("X.0.0", "remove Entity contained by Entity of same type");
|
||||
retract($contained);
|
||||
@ -987,10 +1014,10 @@ rule "X.0.0: remove Entity contained by Entity of same type"
|
||||
rule "X.1.0: merge intersecting Entities of same type"
|
||||
salience 64
|
||||
when
|
||||
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$first: TextEntity($type: type, $entityType: entityType, !resized(), active())
|
||||
$second: TextEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized(), active())
|
||||
then
|
||||
RedactionEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document);
|
||||
TextEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document);
|
||||
$first.remove("X.1.0", "merge intersecting Entities of same type");
|
||||
$second.remove("X.1.0", "merge intersecting Entities of same type");
|
||||
retract($first);
|
||||
@ -1003,8 +1030,8 @@ rule "X.1.0: merge intersecting Entities of same type"
|
||||
rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
|
||||
salience 64
|
||||
when
|
||||
$falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE, isActive())
|
||||
$entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active())
|
||||
$entity: TextEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized(), active())
|
||||
then
|
||||
$entity.getIntersectingNodes().forEach(node -> update(node));
|
||||
$entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE");
|
||||
@ -1016,8 +1043,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
|
||||
rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"
|
||||
salience 64
|
||||
when
|
||||
$falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, isActive())
|
||||
$recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active())
|
||||
$recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active())
|
||||
then
|
||||
$recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION");
|
||||
retract($recommendation);
|
||||
@ -1028,8 +1055,8 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM
|
||||
rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"
|
||||
salience 256
|
||||
when
|
||||
$entity: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive())
|
||||
$recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$entity: TextEntity($type: type, entityType == EntityType.ENTITY, active())
|
||||
$recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active())
|
||||
then
|
||||
$entity.addEngines($recommendation.getEngines());
|
||||
$recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type");
|
||||
@ -1041,8 +1068,8 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit
|
||||
rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
|
||||
salience 256
|
||||
when
|
||||
$entity: RedactionEntity(entityType == EntityType.ENTITY, isActive())
|
||||
$recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$entity: TextEntity(entityType == EntityType.ENTITY, active())
|
||||
$recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized(), active())
|
||||
then
|
||||
$recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY");
|
||||
retract($recommendation);
|
||||
@ -1053,8 +1080,8 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
|
||||
rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY"
|
||||
salience 32
|
||||
when
|
||||
$higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive())
|
||||
$lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$higherRank: TextEntity($type: type, entityType == EntityType.ENTITY, active())
|
||||
$lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized(), active())
|
||||
then
|
||||
$lowerRank.getIntersectingNodes().forEach(node -> update(node));
|
||||
$lowerRank.remove("X.6.0", "remove Entity of lower rank, when intersected by entity of type ENTITY");
|
||||
|
||||
@ -34,22 +34,22 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
|
||||
global Document document
|
||||
global EntityCreationService entityCreationService
|
||||
global ManualRedactionApplicationService manualRedactionApplicationService
|
||||
global ManualChangesApplicationService manualChangesApplicationService
|
||||
global Dictionary dictionary
|
||||
// --------------------------------------- queries -------------------------------------------------------------------
|
||||
|
||||
@ -64,7 +64,7 @@ rule "add NER Entities of type CBI_author or CBI_address"
|
||||
when
|
||||
$nerEntity: EntityRecognitionEntity($type: type, (type == "CBI_author" || type == "CBI_address"))
|
||||
then
|
||||
entityCreationService.byBoundary(new Boundary($nerEntity.getStartOffset(), $nerEntity.getEndOffset()), $type, EntityType.RECOMMENDATION, document)
|
||||
entityCreationService.byBoundary(new TextRange($nerEntity.getStartOffset(), $nerEntity.getEndOffset()), $type, EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(redactionEntity -> insert(redactionEntity));
|
||||
end
|
||||
|
||||
@ -73,7 +73,7 @@ rule "add NER Entities of type CBI_author or CBI_address"
|
||||
rule "Always redact CBI_author"
|
||||
|
||||
when
|
||||
$cbiAuthor: RedactionEntity(type == "CBI_author", entityType == EntityType.ENTITY)
|
||||
$cbiAuthor: TextEntity(type == "CBI_author", entityType == EntityType.ENTITY)
|
||||
then
|
||||
$cbiAuthor.apply("CBI.0.0", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -83,7 +83,7 @@ rule "Always redact CBI_author"
|
||||
rule "Always redact PII"
|
||||
|
||||
when
|
||||
$cbiAuthor: RedactionEntity(type == "PII", entityType == EntityType.ENTITY)
|
||||
$cbiAuthor: TextEntity(type == "PII", entityType == EntityType.ENTITY)
|
||||
then
|
||||
$cbiAuthor.apply("PII.0.0", "PII found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -96,8 +96,8 @@ rule "Always redact PII"
|
||||
rule "X.0.0: remove Entity contained by Entity of same type"
|
||||
salience 65
|
||||
when
|
||||
$larger: RedactionEntity($type: type, $entityType: entityType, isActive())
|
||||
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$larger: TextEntity($type: type, $entityType: entityType, active())
|
||||
$contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized(), active())
|
||||
then
|
||||
$contained.remove("X.0.0", "remove Entity contained by Entity of same type");
|
||||
retract($contained);
|
||||
@ -108,10 +108,10 @@ rule "X.0.0: remove Entity contained by Entity of same type"
|
||||
rule "X.1.0: merge intersecting Entities of same type"
|
||||
salience 64
|
||||
when
|
||||
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$first: TextEntity($type: type, $entityType: entityType, !resized(), active())
|
||||
$second: TextEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized(), active())
|
||||
then
|
||||
RedactionEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document);
|
||||
TextEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document);
|
||||
$first.remove("X.1.0", "merge intersecting Entities of same type");
|
||||
$second.remove("X.1.0", "merge intersecting Entities of same type");
|
||||
retract($first);
|
||||
@ -124,8 +124,8 @@ rule "X.1.0: merge intersecting Entities of same type"
|
||||
rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
|
||||
salience 64
|
||||
when
|
||||
$falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE, isActive())
|
||||
$entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active())
|
||||
$entity: TextEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized(), active())
|
||||
then
|
||||
$entity.getIntersectingNodes().forEach(node -> update(node));
|
||||
$entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE");
|
||||
@ -137,8 +137,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
|
||||
rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"
|
||||
salience 64
|
||||
when
|
||||
$falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, isActive())
|
||||
$recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active())
|
||||
$recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active())
|
||||
then
|
||||
$recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION");
|
||||
retract($recommendation);
|
||||
@ -149,8 +149,8 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM
|
||||
rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"
|
||||
salience 256
|
||||
when
|
||||
$entity: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive())
|
||||
$recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$entity: TextEntity($type: type, entityType == EntityType.ENTITY, active())
|
||||
$recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active())
|
||||
then
|
||||
$entity.addEngines($recommendation.getEngines());
|
||||
$recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type");
|
||||
@ -162,8 +162,8 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit
|
||||
rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
|
||||
salience 256
|
||||
when
|
||||
$entity: RedactionEntity(entityType == EntityType.ENTITY, isActive())
|
||||
$recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$entity: TextEntity(entityType == EntityType.ENTITY, active())
|
||||
$recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized(), active())
|
||||
then
|
||||
$recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY");
|
||||
retract($recommendation);
|
||||
@ -174,8 +174,8 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
|
||||
rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY"
|
||||
salience 32
|
||||
when
|
||||
$higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive())
|
||||
$lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$higherRank: TextEntity($type: type, entityType == EntityType.ENTITY, active())
|
||||
$lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized(), active())
|
||||
then
|
||||
$lowerRank.getIntersectingNodes().forEach(node -> update(node));
|
||||
$lowerRank.remove("X.6.0", "remove Entity of lower rank, when intersected by entity of type ENTITY");
|
||||
|
||||
@ -33,22 +33,23 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
|
||||
|
||||
global Document document
|
||||
global EntityCreationService entityCreationService
|
||||
global ManualRedactionApplicationService manualRedactionApplicationService
|
||||
global ManualChangesApplicationService manualChangesApplicationService
|
||||
global NerEntitiesAdapter nerEntitiesAdapter
|
||||
global Dictionary dictionary
|
||||
|
||||
@ -77,7 +78,7 @@ rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL"
|
||||
rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_author", dictionaryEntry)
|
||||
$entity: TextEntity(type == "CBI_author", dictionaryEntry)
|
||||
then
|
||||
$entity.apply("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -85,7 +86,7 @@ rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)"
|
||||
rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_author", dictionaryEntry)
|
||||
$entity: TextEntity(type == "CBI_author", dictionaryEntry)
|
||||
then
|
||||
$entity.apply("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -95,7 +96,7 @@ rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)"
|
||||
rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_address", dictionaryEntry)
|
||||
$entity: TextEntity(type == "CBI_address", dictionaryEntry)
|
||||
then
|
||||
$entity.skip("CBI.1.0", "Address found for Non Vertebrate Study");
|
||||
end
|
||||
@ -103,7 +104,7 @@ rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)"
|
||||
rule "CBI.1.1: Redact CBI Address (Vertebrate Study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_address", dictionaryEntry)
|
||||
$entity: TextEntity(type == "CBI_address", dictionaryEntry)
|
||||
then
|
||||
$entity.apply("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -112,9 +113,9 @@ rule "CBI.1.1: Redact CBI Address (Vertebrate Study)"
|
||||
// Rule unit: CBI.2
|
||||
rule "CBI.2.0: Don't redact genitive CBI_author"
|
||||
when
|
||||
$entity: RedactionEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), isApplied())
|
||||
$entity: TextEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), applied())
|
||||
then
|
||||
entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document)
|
||||
entityCreationService.byBoundary($entity.getTextRange(), "CBI_author", EntityType.FALSE_POSITIVE, document)
|
||||
.ifPresent(falsePositive -> falsePositive.skip("CBI.2.0", "Genitive Author found"));
|
||||
end
|
||||
|
||||
@ -299,7 +300,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC
|
||||
rule "PII.0.0: Redact all PII (non vertebrate study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$pii: RedactionEntity(type == "PII", dictionaryEntry)
|
||||
$pii: TextEntity(type == "PII", dictionaryEntry)
|
||||
then
|
||||
$pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -307,7 +308,7 @@ rule "PII.0.0: Redact all PII (non vertebrate study)"
|
||||
rule "PII.0.1: Redact all PII (vertebrate study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$pii: RedactionEntity(type == "PII", dictionaryEntry)
|
||||
$pii: TextEntity(type == "PII", dictionaryEntry)
|
||||
then
|
||||
$pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -453,7 +454,7 @@ rule "ETC.3.1: Redact logos (non vertebrate study)"
|
||||
rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'"
|
||||
when
|
||||
not FileAttribute(label == "Confidentiality", value == "confidential")
|
||||
$dossierRedaction: RedactionEntity(type == "dossier_redaction")
|
||||
$dossierRedaction: TextEntity(type == "dossier_redaction")
|
||||
then
|
||||
$dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential");
|
||||
update($dossierRedaction);
|
||||
@ -492,25 +493,36 @@ rule "AI.1.0: combine and add NER Entities as CBI_address"
|
||||
rule "MAN.0.0: Apply manual resize redaction"
|
||||
salience 128
|
||||
when
|
||||
$resizeRedaction: ManualResizeRedaction($id: annotationId)
|
||||
$entityToBeResized: RedactionEntity(matchesAnnotationId($id))
|
||||
$resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToBeResized: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
|
||||
manualChangesApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
|
||||
retract($resizeRedaction);
|
||||
update($entityToBeResized);
|
||||
$entityToBeResized.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
|
||||
rule "MAN.0.1: Apply manual resize redaction"
|
||||
salience 128
|
||||
when
|
||||
$resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageToBeResized: Image(id == $id)
|
||||
then
|
||||
manualChangesApplicationService.resizeImage($imageToBeResized, $resizeRedaction);
|
||||
retract($resizeRedaction);
|
||||
update($imageToBeResized);
|
||||
update($imageToBeResized.getParent());
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: MAN.1
|
||||
rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity"
|
||||
salience 128
|
||||
when
|
||||
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
|
||||
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
|
||||
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToBeRemoved: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToBeRemoved.ignore("MAN.1.0", "Removed by ManualRedaction");
|
||||
$entityToBeRemoved.getManualOverwrite().addChange($idRemoval);
|
||||
update($entityToBeRemoved);
|
||||
retract($idRemoval);
|
||||
$entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node));
|
||||
@ -519,11 +531,10 @@ rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to
|
||||
rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image"
|
||||
salience 128
|
||||
when
|
||||
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
|
||||
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageEntityToBeRemoved: Image($id == id)
|
||||
then
|
||||
$imageEntityToBeRemoved.ignore("MAN.1.1", "Removed by ManualRedaction");
|
||||
$imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval);
|
||||
update($imageEntityToBeRemoved);
|
||||
retract($idRemoval);
|
||||
update($imageEntityToBeRemoved.getParent());
|
||||
@ -535,31 +546,72 @@ rule "MAN.2.0: Apply force redaction"
|
||||
no-loop true
|
||||
salience 128
|
||||
when
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
|
||||
$entityToForce: RedactionEntity(matchesAnnotationId($id))
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToForce: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToForce.force("MAN.2.0", "Forced redaction", $legalBasis);
|
||||
$entityToForce.setRemoved(false);
|
||||
$entityToForce.setIgnored(false);
|
||||
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
|
||||
$entityToForce.getManualOverwrite().addChange($force);
|
||||
update($entityToForce);
|
||||
$entityToForce.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: MAN.3
|
||||
rule "MAN.3.0: Apply image recategorization"
|
||||
rule "MAN.2.1: Apply force redaction to images"
|
||||
no-loop true
|
||||
salience 128
|
||||
when
|
||||
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageToForce: Image(id == $id)
|
||||
then
|
||||
$imageToForce.getManualOverwrite().addChange($force);
|
||||
update($imageToForce);
|
||||
update($imageToForce.getParent());
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: MAN.3
|
||||
rule "MAN.3.0: Apply entity recategorization"
|
||||
salience 128
|
||||
when
|
||||
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToBeRecategorized: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node));
|
||||
manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization);
|
||||
retract($recategorization);
|
||||
// Entity is copied and inserted, so the old entity needs to be retracted to avoid duplication.
|
||||
retract($entityToBeRecategorized);
|
||||
end
|
||||
|
||||
rule "MAN.3.1: Apply image recategorization"
|
||||
salience 128
|
||||
when
|
||||
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageToBeRecategorized: Image($id == id)
|
||||
then
|
||||
$imageToBeRecategorized.setImageType(ImageType.fromString($imageType));
|
||||
manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization);
|
||||
update($imageToBeRecategorized);
|
||||
update($imageToBeRecategorized.getParent());
|
||||
retract($recategorization);
|
||||
end
|
||||
|
||||
// Rule unit: MAN.4
|
||||
rule "MAN.4.0: Apply legal basis change"
|
||||
salience 128
|
||||
when
|
||||
$legalbasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$imageToBeRecategorized: Image($id == id)
|
||||
then
|
||||
$imageToBeRecategorized.getManualOverwrite().addChange($legalbasisChange);
|
||||
end
|
||||
|
||||
rule "MAN.4.1: Apply legal basis change"
|
||||
salience 128
|
||||
when
|
||||
$legalBasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED)
|
||||
$entityToBeChanged: TextEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToBeChanged.getManualOverwrite().addChange($legalBasisChange);
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ Entity merging rules ------------------------------------
|
||||
|
||||
@ -567,8 +619,8 @@ rule "MAN.3.0: Apply image recategorization"
|
||||
rule "X.0.0: remove Entity contained by Entity of same type"
|
||||
salience 65
|
||||
when
|
||||
$larger: RedactionEntity($type: type, $entityType: entityType, isActive())
|
||||
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$larger: TextEntity($type: type, $entityType: entityType, active())
|
||||
$contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized(), active())
|
||||
then
|
||||
$contained.remove("X.0.0", "remove Entity contained by Entity of same type");
|
||||
retract($contained);
|
||||
@ -579,10 +631,10 @@ rule "X.0.0: remove Entity contained by Entity of same type"
|
||||
rule "X.1.0: merge intersecting Entities of same type"
|
||||
salience 64
|
||||
when
|
||||
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$first: TextEntity($type: type, $entityType: entityType, !resized(), active())
|
||||
$second: TextEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized(), active())
|
||||
then
|
||||
RedactionEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document);
|
||||
TextEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document);
|
||||
$first.remove("X.1.0", "merge intersecting Entities of same type");
|
||||
$second.remove("X.1.0", "merge intersecting Entities of same type");
|
||||
retract($first);
|
||||
@ -595,8 +647,8 @@ rule "X.1.0: merge intersecting Entities of same type"
|
||||
rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
|
||||
salience 64
|
||||
when
|
||||
$falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE, isActive())
|
||||
$entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active())
|
||||
$entity: TextEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized(), active())
|
||||
then
|
||||
$entity.getIntersectingNodes().forEach(node -> update(node));
|
||||
$entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE");
|
||||
@ -608,8 +660,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
|
||||
rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"
|
||||
salience 64
|
||||
when
|
||||
$falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, isActive())
|
||||
$recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active())
|
||||
$recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active())
|
||||
then
|
||||
$recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION");
|
||||
retract($recommendation);
|
||||
@ -620,8 +672,8 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM
|
||||
rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"
|
||||
salience 256
|
||||
when
|
||||
$entity: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive())
|
||||
$recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$entity: TextEntity($type: type, entityType == EntityType.ENTITY, active())
|
||||
$recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active())
|
||||
then
|
||||
$entity.addEngines($recommendation.getEngines());
|
||||
$recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type");
|
||||
@ -633,8 +685,8 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit
|
||||
rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
|
||||
salience 256
|
||||
when
|
||||
$entity: RedactionEntity(entityType == EntityType.ENTITY, isActive())
|
||||
$recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$entity: TextEntity(entityType == EntityType.ENTITY, active())
|
||||
$recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized(), active())
|
||||
then
|
||||
$recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY");
|
||||
retract($recommendation);
|
||||
@ -645,8 +697,8 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
|
||||
rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY"
|
||||
salience 32
|
||||
when
|
||||
$higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive())
|
||||
$lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$higherRank: TextEntity($type: type, entityType == EntityType.ENTITY, active())
|
||||
$lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized(), active())
|
||||
then
|
||||
$lowerRank.getIntersectingNodes().forEach(node -> update(node));
|
||||
$lowerRank.remove("X.6.0", "remove Entity of lower rank, when intersected by entity of type ENTITY");
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user