Merge branch 'RED-7317' into 'master'

RED-7317: fix behavior of recategorize

Closes RED-7317

See merge request redactmanager/redaction-service!113
This commit is contained in:
Kilian Schüttler 2023-08-31 15:23:43 +02:00
commit 141c64cde3
65 changed files with 2411 additions and 1549 deletions

View File

@ -16,6 +16,7 @@ val layoutParserVersion = "0.25.0"
val jacksonVersion = "2.15.2"
val droolsVersion = "8.43.0.Final"
val pdfBoxVersion = "3.0.0-alpha2"
val persistenceServiceVersion = "2.155.0"
configurations {
all {
@ -26,7 +27,7 @@ configurations {
dependencies {
implementation(project(":redaction-service-api-v1")) { exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1") }
implementation("com.iqser.red.service:persistence-service-internal-api-v1:2.119.0") { exclude(group = "org.springframework.boot") }
implementation("com.iqser.red.service:persistence-service-internal-api-v1:${persistenceServiceVersion}") { exclude(group = "org.springframework.boot") }
implementation("com.knecon.fforesight:layoutparser-service-internal-api:${layoutParserVersion}")
implementation("com.iqser.red.commons:spring-commons:2.7.0")

View File

@ -11,17 +11,17 @@ import java.util.stream.Collector;
import com.google.common.base.Functions;
public class ConsecutiveBoundaryCollector implements Collector<Boundary, List<Boundary>, List<Boundary>> {
public class ConsecutiveBoundaryCollector implements Collector<TextRange, List<TextRange>, List<TextRange>> {
@Override
public Supplier<List<Boundary>> supplier() {
public Supplier<List<TextRange>> supplier() {
return LinkedList::new;
}
@Override
public BiConsumer<List<Boundary>, Boundary> accumulator() {
public BiConsumer<List<TextRange>, TextRange> accumulator() {
return (existingList, boundary) -> {
if (existingList.isEmpty()) {
@ -29,14 +29,14 @@ public class ConsecutiveBoundaryCollector implements Collector<Boundary, List<Bo
return;
}
Boundary prevBoundary = existingList.get(existingList.size() - 1);
if (prevBoundary.end() > boundary.start()) {
throw new IllegalArgumentException(String.format("Can't concatenate %s and %s. Boundaries must be ordered!", prevBoundary, boundary));
TextRange prevTextRange = existingList.get(existingList.size() - 1);
if (prevTextRange.end() > boundary.start()) {
throw new IllegalArgumentException(String.format("Can't concatenate %s and %s. Boundaries must be ordered!", prevTextRange, boundary));
}
if (prevBoundary.end() == boundary.start()) {
if (prevTextRange.end() == boundary.start()) {
existingList.remove(existingList.size() - 1);
existingList.add(Boundary.merge(List.of(prevBoundary, boundary)));
existingList.add(TextRange.merge(List.of(prevTextRange, boundary)));
} else {
existingList.add(boundary);
}
@ -45,7 +45,7 @@ public class ConsecutiveBoundaryCollector implements Collector<Boundary, List<Bo
@Override
public BinaryOperator<List<Boundary>> combiner() {
public BinaryOperator<List<TextRange>> combiner() {
return (list1, list2) -> {
list1.addAll(list2);
@ -55,7 +55,7 @@ public class ConsecutiveBoundaryCollector implements Collector<Boundary, List<Bo
@Override
public Function<List<Boundary>, List<Boundary>> finisher() {
public Function<List<TextRange>, List<TextRange>> finisher() {
return Functions.identity();
}

View File

@ -13,13 +13,13 @@ import lombok.Setter;
@Setter
@EqualsAndHashCode
public class Boundary implements Comparable<Boundary> {
public class TextRange implements Comparable<TextRange> {
private int start;
private int end;
public Boundary(int start, int end) {
public TextRange(int start, int end) {
if (start > end) {
throw new IllegalArgumentException(format("start: %d > end: %d", start, end));
@ -47,15 +47,15 @@ public class Boundary implements Comparable<Boundary> {
}
public boolean contains(Boundary boundary) {
public boolean contains(TextRange textRange) {
return start <= boundary.start() && boundary.end() <= end;
return start <= textRange.start() && textRange.end() <= end;
}
public boolean containedBy(Boundary boundary) {
public boolean containedBy(TextRange textRange) {
return boundary.contains(this);
return textRange.contains(this);
}
@ -83,18 +83,18 @@ public class Boundary implements Comparable<Boundary> {
}
public boolean intersects(Boundary boundary) {
public boolean intersects(TextRange textRange) {
return boundary.start() < this.end && this.start < boundary.end();
return textRange.start() < this.end && this.start < textRange.end();
}
public List<Boundary> split(List<Integer> splitIndices) {
public List<TextRange> split(List<Integer> splitIndices) {
if (splitIndices.stream().anyMatch(idx -> !this.contains(idx))) {
throw new IndexOutOfBoundsException(format("%s splitting indices are out of range for %s", splitIndices.stream().filter(idx -> !this.contains(idx)).toList(), this));
}
List<Boundary> splitBoundaries = new LinkedList<>();
List<TextRange> splitBoundaries = new LinkedList<>();
int previousIndex = start;
for (int splitIndex : splitIndices) {
@ -102,19 +102,19 @@ public class Boundary implements Comparable<Boundary> {
if (splitIndex == previousIndex) {
continue;
}
splitBoundaries.add(new Boundary(previousIndex, splitIndex));
splitBoundaries.add(new TextRange(previousIndex, splitIndex));
previousIndex = splitIndex;
}
splitBoundaries.add(new Boundary(previousIndex, end));
splitBoundaries.add(new TextRange(previousIndex, end));
return splitBoundaries;
}
public static Boundary merge(Collection<Boundary> boundaries) {
public static TextRange merge(Collection<TextRange> boundaries) {
int minStart = boundaries.stream().mapToInt(Boundary::start).min().orElseThrow(IllegalArgumentException::new);
int maxEnd = boundaries.stream().mapToInt(Boundary::end).max().orElseThrow(IllegalArgumentException::new);
return new Boundary(minStart, maxEnd);
int minStart = boundaries.stream().mapToInt(TextRange::start).min().orElseThrow(IllegalArgumentException::new);
int maxEnd = boundaries.stream().mapToInt(TextRange::end).max().orElseThrow(IllegalArgumentException::new);
return new TextRange(minStart, maxEnd);
}
@ -126,12 +126,12 @@ public class Boundary implements Comparable<Boundary> {
@Override
public int compareTo(Boundary boundary) {
public int compareTo(TextRange textRange) {
if (end < boundary.end() && start < boundary.start()) {
if (end < textRange.end() && start < textRange.start()) {
return -1;
}
if (start > boundary.start() && end > boundary.end()) {
if (start > textRange.start() && end > textRange.end()) {
return 1;
}
@ -145,7 +145,7 @@ public class Boundary implements Comparable<Boundary> {
* @param textBlock TextBlock to check whitespaces against
* @return trimmed boundary
*/
public Boundary trim(TextBlock textBlock) {
public TextRange trim(TextBlock textBlock) {
if (this.length() == 0) {
return this;
@ -160,7 +160,7 @@ public class Boundary implements Comparable<Boundary> {
trimmedEnd--;
}
return new Boundary(trimmedStart, Math.max(trimmedEnd, trimmedStart));
return new TextRange(trimmedStart, Math.max(trimmedEnd, trimmedStart));
}
}

View File

@ -7,38 +7,54 @@ import java.util.Set;
import lombok.NonNull;
public interface MatchedRuleHolder {
public interface Entity {
PriorityQueue<MatchedRule> getMatchedRuleList();
boolean isIgnored();
ManualChangeOverwrite getManualOverwrite();
boolean isRemoved();
// Don't use default accessor pattern (e.g. isIgnored()), as it might lead to errors in drools due to property-specific optimization of the drools planner.
default boolean ignored() {
void setIgnored(boolean ignored);
void setRemoved(boolean ignored);
default boolean isApplied() {
return getMatchedRule().isApplied();
return getManualOverwrite().getIgnored().orElse(getMatchedRule().isIgnored());
}
default Set<RedactionEntity> getReferences() {
default boolean removed() {
return getManualOverwrite().getRemoved().orElse(getMatchedRule().isRemoved());
}
default boolean resized() {
return getManualOverwrite().getResized().orElse(false);
}
default boolean applied() {
return getManualOverwrite().getApplied().orElse(getMatchedRule().isApplied());
}
default boolean hasManualChanges() {
return !getManualOverwrite().getManualChangeLog().isEmpty();
}
default Set<TextEntity> references() {
return getMatchedRule().getReferences();
}
default boolean isActive() {
default boolean active() {
return !(isRemoved() || isIgnored());
return !(removed() || ignored());
}
@ -82,15 +98,13 @@ public interface MatchedRuleHolder {
default void remove(String ruleIdentifier, String reason) {
addMatchedRule(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).build());
setRemoved(true);
addMatchedRule(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).removed(true).build());
}
default void ignore(String ruleIdentifier, String reason) {
addMatchedRule(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).build());
setIgnored(true);
addMatchedRule(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).ignored(true).build());
}
@ -121,7 +135,7 @@ public interface MatchedRuleHolder {
}
default void applyWithReferences(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis, Collection<RedactionEntity> references) {
default void applyWithReferences(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis, Collection<TextEntity> references) {
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
@ -136,7 +150,7 @@ public interface MatchedRuleHolder {
}
default void skipWithReferences(@NonNull String ruleIdentifier, String reason, Collection<RedactionEntity> references) {
default void skipWithReferences(@NonNull String ruleIdentifier, String reason, Collection<TextEntity> references) {
getMatchedRuleList().add(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).references(new HashSet<>(references)).build());
}
@ -150,6 +164,9 @@ public interface MatchedRuleHolder {
default void addMatchedRules(Collection<MatchedRule> matchedRules) {
if (getMatchedRuleList().equals(matchedRules)) {
return;
}
getMatchedRuleList().addAll(matchedRules);
}
@ -168,4 +185,22 @@ public interface MatchedRuleHolder {
return getMatchedRuleList().peek();
}
default String buildReasonWithManualChangeDescriptions() {
if (getManualOverwrite().getDescriptions().isEmpty()) {
return getMatchedRule().getReason();
}
if (getMatchedRule().getReason().isEmpty()) {
return String.join(", ", getManualOverwrite().getDescriptions());
}
return getMatchedRule().getReason() + ", " + String.join(", ", getManualOverwrite().getDescriptions());
}
default String legalBasis() {
return getManualOverwrite().getLegalBasis().orElse(getMatchedRule().getLegalBasis());
}
}

View File

@ -0,0 +1,200 @@
package com.iqser.red.service.redaction.v1.server.document.graph.entity;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.NoArgsConstructor;
import lombok.experimental.FieldDefaults;
@Builder
@NoArgsConstructor
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
public class ManualChangeOverwrite {
private static final Map<Class<? extends BaseAnnotation>, String> MANUAL_CHANGE_DESCRIPTIONS = Map.of(//
ManualRedactionEntry.class, "created by manual change", //
ManualLegalBasisChange.class, "legal basis was manually changed", //
ManualResizeRedaction.class, "resized by manual override", //
ManualForceRedaction.class, "forced by manual override", //
IdRemoval.class, "removed by manual override", //
ManualImageRecategorization.class, "recategorized by manual override");
List<BaseAnnotation> manualChanges = new LinkedList<>();
boolean changed;
List<String> descriptions;
String type;
String legalBasis;
String section;
String value;
Boolean applied;
Boolean removed;
Boolean ignored;
Boolean resized;
Boolean recategorized;
public void calculateCurrentOverride() {
if (!changed) {
return;
}
List<BaseAnnotation> sortedManualChanges = getManualChangeLog();
updateFields(sortedManualChanges);
}
public List<BaseAnnotation> getManualChangeLog() {
if (!changed) {
return manualChanges;
}
manualChanges.sort(Comparator.comparing(BaseAnnotation::getRequestDate));
updateFields(manualChanges);
// make list unmodifiable.
return manualChanges.stream().toList();
}
private void updateFields(List<BaseAnnotation> sortedManualChanges) {
descriptions = new LinkedList<>();
for (BaseAnnotation manualChange : sortedManualChanges) {
// ManualRedactionEntries are created prior to rule execution in analysis service.
if (manualChange instanceof IdRemoval) {
applied = false;
ignored = true;
}
if (manualChange instanceof ManualForceRedaction manualForceRedaction) {
removed = false;
ignored = false;
applied = true;
legalBasis = manualForceRedaction.getLegalBasis();
}
if (manualChange instanceof ManualLegalBasisChange manualLegalBasisChange) {
section = manualLegalBasisChange.getSection();
legalBasis = manualLegalBasisChange.getLegalBasis();
value = manualLegalBasisChange.getValue();
}
if (manualChange instanceof ManualResizeRedaction) {
// resizing logic happens in ManualChangesApplicationService.
resized = true;
}
if (manualChange instanceof ManualImageRecategorization recategorization) {
// recategorization logic happens in ManualChangesApplicationService.
recategorized = true;
// this is only relevant for ManualEntities. Image and TextEntity is recategorized in the ManualChangesApplicationService.
type = recategorization.getType();
}
descriptions.add(MANUAL_CHANGE_DESCRIPTIONS.get(manualChange.getClass()));
}
changed = false;
}
public void addChange(BaseAnnotation manualChange) {
changed = true;
manualChanges.add(manualChange);
}
public void addChanges(List<BaseAnnotation> manualChangeLog) {
changed = true;
manualChanges.addAll(manualChangeLog);
}
public Optional<String> getLegalBasis() {
calculateCurrentOverride();
return legalBasis == null ? Optional.empty() : Optional.of(legalBasis);
}
public Optional<String> getType() {
calculateCurrentOverride();
return type == null ? Optional.empty() : Optional.of(type);
}
public Optional<String> getSection() {
calculateCurrentOverride();
return section == null ? Optional.empty() : Optional.of(section);
}
public Optional<String> getValue() {
calculateCurrentOverride();
return value == null ? Optional.empty() : Optional.of(value);
}
public Optional<Boolean> getApplied() {
calculateCurrentOverride();
return applied == null ? Optional.empty() : Optional.of(applied);
}
public Optional<Boolean> getRemoved() {
calculateCurrentOverride();
return removed == null ? Optional.empty() : Optional.of(removed);
}
public Optional<Boolean> getIgnored() {
calculateCurrentOverride();
return ignored == null ? Optional.empty() : Optional.of(ignored);
}
public Optional<Boolean> getResized() {
calculateCurrentOverride();
return resized == null ? Optional.empty() : Optional.of(resized);
}
public Optional<Boolean> getRecategorized() {
calculateCurrentOverride();
return recategorized == null ? Optional.empty() : Optional.of(recategorized);
}
public List<String> getDescriptions() {
calculateCurrentOverride();
return descriptions == null ? Collections.emptyList() : descriptions;
}
}

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.server.document.graph.entity;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Set;
@ -18,6 +19,10 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public final class MatchedRule implements Comparable<MatchedRule> {
public static final String FINAL_TYPE = "FINAL";
public static final String ELIMINATION_RULE_TYPE = "X";
private static final List<String> RULE_TYPE_PRIORITIES = List.of(FINAL_TYPE, ELIMINATION_RULE_TYPE);
@Builder.Default
RuleIdentifier ruleIdentifier = RuleIdentifier.empty();
@Builder.Default
@ -26,8 +31,11 @@ public final class MatchedRule implements Comparable<MatchedRule> {
String legalBasis = "";
boolean applied;
boolean writeValueWithLineBreaks;
boolean removed;
boolean ignored;
boolean resized;
@Builder.Default
Set<RedactionEntity> references = Collections.emptySet();
Set<TextEntity> references = Collections.emptySet();
public static MatchedRule empty() {
@ -39,32 +47,40 @@ public final class MatchedRule implements Comparable<MatchedRule> {
@Override
public int compareTo(MatchedRule matchedRule) {
// Only the highest ranked rule is actually applied, this method defines the highest order.
// First, it compares the Rule Type, RULE_TYPE_PRIORITIES defines the order of types.
// Types not in the list have the lowest priority.
// The ones in the list are technical exceptions and should override any other Rule.
// Aside from them Entities should never match from more than one type!
// E.g. a CBI_author entity should **always** only match CBI.*.* rules.
// Otherwise, something went wrong with the rules. :)
RuleIdentifier otherRuleIdentifier = matchedRule.getRuleIdentifier();
if (!Objects.equals(ruleIdentifier.type(), otherRuleIdentifier.type())) {
if (Objects.equals(otherRuleIdentifier.type(), "MAN")) {
return 1;
}
if (Objects.equals(ruleIdentifier.type(), "MAN")) {
return -1;
}
if (Objects.equals(otherRuleIdentifier.type(), "X")) {
return 1;
}
if (Objects.equals(ruleIdentifier.type(), "X")) {
return -1;
}
boolean thisInList = RULE_TYPE_PRIORITIES.contains(this.getRuleIdentifier().type());
boolean otherInList = RULE_TYPE_PRIORITIES.contains(otherRuleIdentifier.type());
// Compare the types
if (thisInList && !otherInList) {
return -1;
} else if (!thisInList && otherInList) {
return 1;
} else if (thisInList && otherInList) {
int thisIndex = RULE_TYPE_PRIORITIES.indexOf(this.getRuleIdentifier().type());
int otherIndex = RULE_TYPE_PRIORITIES.indexOf(otherRuleIdentifier.type());
return Integer.compare(thisIndex, otherIndex);
}
// Then compare the unit
if (!Objects.equals(otherRuleIdentifier.unit(), getRuleIdentifier().unit())) {
return otherRuleIdentifier.unit() - ruleIdentifier.unit();
return Integer.compare(otherRuleIdentifier.unit(), ruleIdentifier.unit());
}
return otherRuleIdentifier.id() - ruleIdentifier.id();
// Then compare the id inside the unit
return Integer.compare(otherRuleIdentifier.id(), ruleIdentifier.id());
}
@Override
public String toString() {
return "MatchedRule[" + "ruleIdentifier=" + ruleIdentifier + ", " + "reason=" + reason + ", " + "legalBasis=" + legalBasis + ", " + "applied=" + applied + ", " + "writeValueWithLineBreaks=" + writeValueWithLineBreaks + ", " + "references=" + references + ']';
return "MatchedRule[ruleIdentifier=" + ruleIdentifier + ", reason=" + reason + ", legalBasis=" + legalBasis + ", applied=" + applied + ", writeValueWithLineBreaks=" + writeValueWithLineBreaks + ", references=" + references + ']';
}
}

View File

@ -13,12 +13,13 @@ import lombok.experimental.FieldDefaults;
@Data
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
public class RedactionPosition {
public class PositionOnPage {
// Each entry in this list corresponds to an entry in the redaction log, this means:
// A single entity might be represented by multiple redaction log entries
// This is due to the RedactionLog only being able to handle a single page per entry.
final String id;
Page page;
// Each entry in this list corresponds to an entry in the redaction log, this means:
// An entity might be represented by multiple redaction log entries
List<Rectangle2D> rectanglePerLine;
}

View File

@ -11,9 +11,9 @@ import java.util.PriorityQueue;
import java.util.Set;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import lombok.AccessLevel;
@ -28,29 +28,28 @@ import lombok.experimental.FieldDefaults;
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
public class RedactionEntity implements MatchedRuleHolder {
public class TextEntity implements Entity {
// initial values
// primary key
@EqualsAndHashCode.Include
final Boundary boundary;
final TextRange textRange;
@EqualsAndHashCode.Include
final String type;
@EqualsAndHashCode.Include
final EntityType entityType;
// primary key end
// empty defaults
boolean removed;
boolean ignored;
@Builder.Default
final PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
@Builder.Default
final ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite();
boolean resized;
boolean skipRemoveEntitiesContainedInLarger;
boolean dictionaryEntry;
boolean dossierDictionaryEntry;
@Builder.Default
Set<Engine> engines = new HashSet<>();
@Builder.Default
PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
// inferred on graph insertion
String value;
@ -58,15 +57,15 @@ public class RedactionEntity implements MatchedRuleHolder {
String textAfter;
@Builder.Default
Set<Page> pages = new HashSet<>();
List<RedactionPosition> redactionPositionsPerPage;
List<PositionOnPage> positionsOnPagePerPage;
@Builder.Default
List<SemanticNode> intersectingNodes = new LinkedList<>();
SemanticNode deepestFullyContainingNode;
public static RedactionEntity initialEntityNode(Boundary boundary, String type, EntityType entityType) {
public static TextEntity initialEntityNode(TextRange textRange, String type, EntityType entityType) {
return RedactionEntity.builder().type(type).entityType(entityType).boundary(boundary).build();
return TextEntity.builder().type(type).entityType(entityType).textRange(textRange).build();
}
@ -102,7 +101,7 @@ public class RedactionEntity implements MatchedRuleHolder {
public String getValueWithLineBreaks() {
return getDeepestFullyContainingNode().getTextBlock().subSequenceWithLineBreaks(getBoundary());
return getDeepestFullyContainingNode().getTextBlock().subSequenceWithLineBreaks(getTextRange());
}
@ -113,14 +112,14 @@ public class RedactionEntity implements MatchedRuleHolder {
intersectingNodes = new LinkedList<>();
deepestFullyContainingNode = null;
pages = new HashSet<>();
removed = true;
remove("FINAL.0.0", "removed completely");
}
public List<RedactionPosition> getRedactionPositionsPerPage() {
public List<PositionOnPage> getPositionsOnPagePerPage() {
if (redactionPositionsPerPage == null || redactionPositionsPerPage.isEmpty()) {
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = deepestFullyContainingNode.getTextBlock().getPositionsPerPage(boundary);
if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) {
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = deepestFullyContainingNode.getTextBlock().getPositionsPerPage(textRange);
Page firstPage = rectanglesPerLinePerPage.keySet()
.stream()
@ -128,37 +127,37 @@ public class RedactionEntity implements MatchedRuleHolder {
.orElseThrow(() -> new RuntimeException("No Positions found on any page!"));
String id = IdBuilder.buildId(pages, rectanglesPerLinePerPage.values().stream().flatMap(Collection::stream).toList(), type, entityType.name());
redactionPositionsPerPage = rectanglesPerLinePerPage.entrySet().stream().map(entry -> buildRedactionPosition(firstPage, id, entry)).toList();
positionsOnPagePerPage = rectanglesPerLinePerPage.entrySet().stream().map(entry -> buildPositionOnPage(firstPage, id, entry)).toList();
}
return redactionPositionsPerPage;
return positionsOnPagePerPage;
}
private static RedactionPosition buildRedactionPosition(Page firstPage, String id, Map.Entry<Page, List<Rectangle2D>> entry) {
private static PositionOnPage buildPositionOnPage(Page firstPage, String id, Map.Entry<Page, List<Rectangle2D>> entry) {
if (entry.getKey().equals(firstPage)) {
return new RedactionPosition(id, entry.getKey(), entry.getValue());
return new PositionOnPage(id, entry.getKey(), entry.getValue());
} else {
return new RedactionPosition(id + "-" + entry.getKey().getNumber(), entry.getKey(), entry.getValue());
return new PositionOnPage(id + "-" + entry.getKey().getNumber(), entry.getKey(), entry.getValue());
}
}
public boolean containedBy(RedactionEntity redactionEntity) {
public boolean containedBy(TextEntity textEntity) {
return this.boundary.containedBy(redactionEntity.getBoundary());
return this.textRange.containedBy(textEntity.getTextRange());
}
public boolean contains(RedactionEntity redactionEntity) {
public boolean contains(TextEntity textEntity) {
return this.boundary.contains(redactionEntity.getBoundary());
return this.textRange.contains(textEntity.getTextRange());
}
public boolean intersects(RedactionEntity redactionEntity) {
public boolean intersects(TextEntity textEntity) {
return this.boundary.intersects(redactionEntity.getBoundary());
return this.textRange.intersects(textEntity.getTextRange());
}
@ -176,7 +175,7 @@ public class RedactionEntity implements MatchedRuleHolder {
public boolean matchesAnnotationId(String manualRedactionId) {
return getRedactionPositionsPerPage().stream().anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId));
return getPositionsOnPagePerPage().stream().anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId));
}
@ -187,7 +186,7 @@ public class RedactionEntity implements MatchedRuleHolder {
sb.append("Entity[\"");
sb.append(value);
sb.append("\", ");
sb.append(boundary);
sb.append(textRange);
sb.append(", pages[");
pages.forEach(page -> {
sb.append(page.getNumber());

View File

@ -11,7 +11,7 @@ import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
@ -34,7 +34,7 @@ public class Document implements GenericSemanticNode {
Integer numberOfPages;
TextBlock textBlock;
@Builder.Default
Set<RedactionEntity> entities = new HashSet<>();
Set<TextEntity> entities = new HashSet<>();
@Builder.Default
static final SectionIdentifier sectionIdentifier = SectionIdentifier.document();

View File

@ -5,7 +5,7 @@ import java.util.List;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import lombok.AccessLevel;
@ -34,7 +34,7 @@ public class Footer implements GenericSemanticNode {
@Builder.Default
@EqualsAndHashCode.Exclude
Set<RedactionEntity> entities = new HashSet<>();
Set<TextEntity> entities = new HashSet<>();
@Override

View File

@ -4,7 +4,7 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
@ -34,7 +34,7 @@ public class Header implements GenericSemanticNode {
@Builder.Default
@EqualsAndHashCode.Exclude
Set<RedactionEntity> entities = new HashSet<>();
Set<TextEntity> entities = new HashSet<>();
@Override

View File

@ -5,7 +5,7 @@ import java.util.List;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
@ -33,7 +33,7 @@ public class Headline implements GenericSemanticNode {
@Builder.Default
@EqualsAndHashCode.Exclude
Set<RedactionEntity> entities = new HashSet<>();
Set<TextEntity> entities = new HashSet<>();
@Override

View File

@ -9,10 +9,11 @@ import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.Entity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.ManualChangeOverwrite;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRuleHolder;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
@ -29,7 +30,7 @@ import lombok.experimental.FieldDefaults;
@AllArgsConstructor
@NoArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
public class Image implements GenericSemanticNode, MatchedRuleHolder {
public class Image implements GenericSemanticNode, Entity {
List<Integer> treeId;
String id;
@ -38,12 +39,12 @@ public class Image implements GenericSemanticNode, MatchedRuleHolder {
boolean transparent;
Rectangle2D position;
boolean removed;
boolean ignored;
@Builder.Default
PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
@Builder.Default
ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite();
@EqualsAndHashCode.Exclude
Page page;
@ -52,7 +53,7 @@ public class Image implements GenericSemanticNode, MatchedRuleHolder {
@Builder.Default
@EqualsAndHashCode.Exclude
Set<RedactionEntity> entities = new HashSet<>();
Set<TextEntity> entities = new HashSet<>();
@Override

View File

@ -1,11 +1,10 @@
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
@ -40,7 +39,7 @@ public class Page {
@Builder.Default
@EqualsAndHashCode.Exclude
Set<RedactionEntity> entities = new HashSet<>();
Set<TextEntity> entities = new HashSet<>();
@Builder.Default
@EqualsAndHashCode.Exclude

View File

@ -5,7 +5,7 @@ import java.util.List;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import lombok.AccessLevel;
@ -29,7 +29,7 @@ public class Paragraph implements GenericSemanticNode {
@Builder.Default
@EqualsAndHashCode.Exclude
Set<RedactionEntity> entities = new HashSet<>();
Set<TextEntity> entities = new HashSet<>();
@Override

View File

@ -4,7 +4,7 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
@ -32,7 +32,7 @@ public class Section implements GenericSemanticNode {
@Builder.Default
@EqualsAndHashCode.Exclude
Set<RedactionEntity> entities = new HashSet<>();
Set<TextEntity> entities = new HashSet<>();
@Override

View File

@ -13,9 +13,9 @@ import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
@ -43,11 +43,11 @@ public interface SemanticNode {
/**
* Any Node maintains its own Set of Entities.
* This Set contains all Entities whose boundary intersects the boundary of this node.
* This Set contains all Entities whose TextRange intersects the TextRange of this node.
*
* @return Set of all Entities associated with this Node
*/
Set<RedactionEntity> getEntities();
Set<TextEntity> getEntities();
/**
@ -72,16 +72,16 @@ public interface SemanticNode {
/**
* Each AtomicTextBlock is assigned a page, so to get the pages for this boundary, it collects the PageNodes from each AtomicTextBlock belonging to this node's TextBlock.
* Each AtomicTextBlock is assigned a page, so to get the pages for this TextRange, it collects the PageNodes from each AtomicTextBlock belonging to this node's TextBlock.
*
* @return Set of PageNodes this node appears on.
*/
default Set<Page> getPages(Boundary boundary) {
default Set<Page> getPages(TextRange textRange) {
if (!getBoundary().contains(boundary)) {
throw new IllegalArgumentException(format("%s which was used to query for pages is not contained in the %s of this node!", boundary, getBoundary()));
if (!getTextRange().contains(textRange)) {
throw new IllegalArgumentException(format("%s which was used to query for pages is not contained in the %s of this node!", textRange, getTextRange()));
}
return getTextBlock().getPages(boundary);
return getTextBlock().getPages(textRange);
}
@ -215,7 +215,7 @@ public interface SemanticNode {
*/
default boolean hasEntitiesOfType(String type) {
return getEntities().stream().filter(RedactionEntity::isActive).anyMatch(redactionEntity -> redactionEntity.getType().equals(type));
return getEntities().stream().filter(TextEntity::active).anyMatch(redactionEntity -> redactionEntity.getType().equals(type));
}
@ -228,7 +228,7 @@ public interface SemanticNode {
*/
default boolean hasEntitiesOfAnyType(String... types) {
return getEntities().stream().filter(RedactionEntity::isActive).anyMatch(redactionEntity -> Arrays.stream(types).anyMatch(type -> redactionEntity.getType().equals(type)));
return getEntities().stream().filter(TextEntity::active).anyMatch(redactionEntity -> Arrays.stream(types).anyMatch(type -> redactionEntity.getType().equals(type)));
}
@ -242,8 +242,8 @@ public interface SemanticNode {
default boolean hasEntitiesOfAllTypes(String... types) {
return getEntities().stream()
.filter(RedactionEntity::isActive)
.map(RedactionEntity::getType)
.filter(TextEntity::active)
.map(TextEntity::getType)
.collect(Collectors.toUnmodifiableSet())
.containsAll(Arrays.stream(types).toList());
}
@ -256,9 +256,9 @@ public interface SemanticNode {
* @param type string representing the type of entities to return
* @return List of RedactionEntities of any the type
*/
default List<RedactionEntity> getEntitiesOfType(String type) {
default List<TextEntity> getEntitiesOfType(String type) {
return getEntities().stream().filter(RedactionEntity::isActive).filter(redactionEntity -> redactionEntity.getType().equals(type)).toList();
return getEntities().stream().filter(TextEntity::active).filter(redactionEntity -> redactionEntity.getType().equals(type)).toList();
}
@ -269,9 +269,9 @@ public interface SemanticNode {
* @param types A list of strings representing the types of entities to return
* @return List of RedactionEntities of any provided type
*/
default List<RedactionEntity> getEntitiesOfType(List<String> types) {
default List<TextEntity> getEntitiesOfType(List<String> types) {
return getEntities().stream().filter(RedactionEntity::isActive).filter(redactionEntity -> redactionEntity.isAnyType(types)).toList();
return getEntities().stream().filter(TextEntity::active).filter(redactionEntity -> redactionEntity.isAnyType(types)).toList();
}
@ -282,9 +282,9 @@ public interface SemanticNode {
* @param types A list of strings representing the types of entities to return
* @return List of RedactionEntities that match any of the provided types
*/
default List<RedactionEntity> getEntitiesOfType(String... types) {
default List<TextEntity> getEntitiesOfType(String... types) {
return getEntities().stream().filter(RedactionEntity::isActive).filter(redactionEntity -> redactionEntity.isAnyType(Arrays.stream(types).toList())).toList();
return getEntities().stream().filter(TextEntity::active).filter(redactionEntity -> redactionEntity.isAnyType(Arrays.stream(types).toList())).toList();
}
@ -440,22 +440,22 @@ public interface SemanticNode {
/**
* This function is used during insertion of EntityNodes into the graph, it checks if the boundary of the RedactionEntity intersects or even contains the RedactionEntity.
* This function is used during insertion of EntityNodes into the graph, it checks if the TextRange of the RedactionEntity intersects or even contains the RedactionEntity.
* It sets the fields accordingly and recursively calls this function on all its children.
*
* @param redactionEntity RedactionEntity, which is being inserted into the graph
* @param textEntity RedactionEntity, which is being inserted into the graph
*/
default void addThisToEntityIfIntersects(RedactionEntity redactionEntity) {
default void addThisToEntityIfIntersects(TextEntity textEntity) {
TextBlock textBlock = getTextBlock();
if (textBlock.getBoundary().intersects(redactionEntity.getBoundary())) {
if (textBlock.containsBoundary(redactionEntity.getBoundary())) {
redactionEntity.setDeepestFullyContainingNode(this);
if (textBlock.getTextRange().intersects(textEntity.getTextRange())) {
if (textBlock.containsTextRange(textEntity.getTextRange())) {
textEntity.setDeepestFullyContainingNode(this);
}
redactionEntity.addIntersectingNode(this);
streamChildren().filter(semanticNode -> semanticNode.getBoundary().intersects(redactionEntity.getBoundary()))
.forEach(node -> node.addThisToEntityIfIntersects(redactionEntity));
textEntity.addIntersectingNode(this);
streamChildren().filter(semanticNode -> semanticNode.getTextRange().intersects(textEntity.getTextRange()))
.forEach(node -> node.addThisToEntityIfIntersects(textEntity));
}
}
@ -505,13 +505,13 @@ public interface SemanticNode {
/**
* The Boundary is the start and end string offsets in the reading order of the document.
* The TextRange is the start and end string offsets in the reading order of the document.
*
* @return Boundary of this Node's TextBlock
* @return TextRange of this Node's TextBlock
*/
default Boundary getBoundary() {
default TextRange getTextRange() {
return getTextBlock().getBoundary();
return getTextBlock().getTextRange();
}
@ -522,7 +522,7 @@ public interface SemanticNode {
*/
default int length() {
return getBoundary().length();
return getTextRange().length();
}

View File

@ -11,7 +11,7 @@ import java.util.stream.IntStream;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
@ -38,7 +38,7 @@ public class Table implements SemanticNode {
@Builder.Default
@EqualsAndHashCode.Exclude
Set<RedactionEntity> entities = new HashSet<>();
Set<TextEntity> entities = new HashSet<>();
/**
@ -47,7 +47,7 @@ public class Table implements SemanticNode {
* @param strings Strings to check whether a row contains them
* @return Stream of all entities in this table, that appear in a row, which contains any of the provided strings
*/
public Stream<RedactionEntity> streamEntitiesWhereRowContainsStringsIgnoreCase(List<String> strings) {
public Stream<TextEntity> streamEntitiesWhereRowContainsStringsIgnoreCase(List<String> strings) {
return IntStream.range(0, numberOfRows)
.boxed()
@ -79,7 +79,7 @@ public class Table implements SemanticNode {
* @param value the string which the table cell should contain
* @return a stream of all entities, which appear in a row where at least one cell has the provided header and the provided value.
*/
public Stream<RedactionEntity> streamEntitiesWhereRowHasHeaderAndValue(String header, String value) {
public Stream<TextEntity> streamEntitiesWhereRowHasHeaderAndValue(String header, String value) {
List<Integer> vertebrateStudyCols = streamHeaders().filter(headerNode -> headerNode.containsString(header)).map(TableCell::getCol).toList();
return streamTableCells().filter(tableCellNode -> vertebrateStudyCols.stream()
@ -94,7 +94,7 @@ public class Table implements SemanticNode {
* @param values the strings which the table cell should contain
* @return a stream of all entities, which appear in a row where at least one cell has the provided header and any provided value.
*/
public Stream<RedactionEntity> streamEntitiesWhereRowHasHeaderAndAnyValue(String header, List<String> values) {
public Stream<TextEntity> streamEntitiesWhereRowHasHeaderAndAnyValue(String header, List<String> values) {
List<Integer> colsWithHeader = streamHeaders().filter(headerNode -> headerNode.containsString(header)).map(TableCell::getCol).toList();
return streamTableCells().filter(tableCellNode -> colsWithHeader.stream()
@ -109,12 +109,12 @@ public class Table implements SemanticNode {
* @param types type strings to check whether a row contains an entity like them
* @return Stream of all entities in this table, that appear in a row, which contains at least one entity with any of the provided types.
*/
public Stream<RedactionEntity> streamEntitiesWhereRowContainsEntitiesOfType(List<String> types) {
public Stream<TextEntity> streamEntitiesWhereRowContainsEntitiesOfType(List<String> types) {
List<Integer> rowsWithEntityOfType = getEntities().stream()
.filter(RedactionEntity::isActive)
.filter(TextEntity::active)
.filter(redactionEntity -> types.stream().anyMatch(type -> type.equals(redactionEntity.getType())))
.map(RedactionEntity::getIntersectingNodes)
.map(TextEntity::getIntersectingNodes)
.filter(node -> node instanceof TableCell)
.map(node -> (TableCell) node)
.map(TableCell::getRow)
@ -131,13 +131,13 @@ public class Table implements SemanticNode {
* @param types type strings to check whether a row contains an entity like them
* @return Stream of all entities in this table, that appear in a row, which contains at least one entity with any of the provided types.
*/
public Stream<RedactionEntity> streamEntitiesWhereRowContainsNoEntitiesOfType(List<String> types) {
public Stream<TextEntity> streamEntitiesWhereRowContainsNoEntitiesOfType(List<String> types) {
return IntStream.range(0, numberOfRows)
.boxed()
.filter(rowNumber -> streamRow(rowNumber).map(TableCell::getEntities)
.flatMap(Collection::stream)
.filter(RedactionEntity::isActive)
.filter(TextEntity::active)
.noneMatch(entity -> types.contains(entity.getType())))
.flatMap(this::streamRow)
.map(TableCell::getEntities)
@ -290,12 +290,12 @@ public class Table implements SemanticNode {
* Ignores Entity with ignored == true or removed == true.
*
* @param type the type of entities to search for
* @param redactionEntity the entity, which appears in the row to search
* @param textEntity the entity, which appears in the row to search
* @return List of all entities of the provided type, which appear in the same row that the provided entity appears in.
*/
public List<RedactionEntity> getEntitiesOfTypeInSameRow(String type, RedactionEntity redactionEntity) {
public List<TextEntity> getEntitiesOfTypeInSameRow(String type, TextEntity textEntity) {
return redactionEntity.getIntersectingNodes()
return textEntity.getIntersectingNodes()
.stream()
.filter(node -> node instanceof TableCell)
.map(node -> (TableCell) node)

View File

@ -7,7 +7,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
@ -41,7 +41,7 @@ public class TableCell implements GenericSemanticNode {
@Builder.Default
@EqualsAndHashCode.Exclude
Set<RedactionEntity> entities = new HashSet<>();
Set<TextEntity> entities = new HashSet<>();
@Override

View File

@ -12,7 +12,7 @@ import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
@ -38,7 +38,7 @@ public class AtomicTextBlock implements TextBlock {
Page page;
//string coordinates
Boundary boundary;
TextRange textRange;
String searchText;
List<Integer> lineBreaks;
@ -61,7 +61,7 @@ public class AtomicTextBlock implements TextBlock {
return AtomicTextBlock.builder()
.id(textBlockIdx)
.boundary(new Boundary(stringOffset, stringOffset))
.textRange(new TextRange(stringOffset, stringOffset))
.searchText("")
.lineBreaks(Collections.emptyList())
.page(page)
@ -82,7 +82,7 @@ public class AtomicTextBlock implements TextBlock {
.id(atomicTextBlockData.getId())
.numberOnPage(atomicTextBlockData.getNumberOnPage())
.page(page)
.boundary(new Boundary(atomicTextBlockData.getStart(), atomicTextBlockData.getEnd()))
.textRange(new TextRange(atomicTextBlockData.getStart(), atomicTextBlockData.getEnd()))
.searchText(atomicTextBlockData.getSearchText())
.lineBreaks(Arrays.stream(atomicTextBlockData.getLineBreaks()).boxed().toList())
.stringIdxToPositionIdx(Arrays.stream(atomicPositionBlockData.getStringIdxToPositionIdx()).boxed().toList())
@ -98,20 +98,20 @@ public class AtomicTextBlock implements TextBlock {
}
public Boundary getLineBoundary(int lineNumber) {
public TextRange getLineTextRange(int lineNumber) {
if (lineNumber >= numberOfLines() || lineNumber < 0) {
return new Boundary(boundary.start(), boundary.start());
return new TextRange(textRange.start(), textRange.start());
}
if (numberOfLines() == 1) {
return boundary;
return textRange;
}
if (lineNumber == 0) {
return new Boundary(boundary.start(), lineBreaks.get(0) + boundary.start());
return new TextRange(textRange.start(), lineBreaks.get(0) + textRange.start());
} else if (lineNumber == numberOfLines() - 1) {
return new Boundary(lineBreaks.get(lineBreaks.size() - 1) + boundary.start(), boundary.end());
return new TextRange(lineBreaks.get(lineBreaks.size() - 1) + textRange.start(), textRange.end());
}
return new Boundary(lineBreaks.get(lineNumber - 1) + boundary.start(), lineBreaks.get(lineNumber) + boundary.start());
return new TextRange(lineBreaks.get(lineNumber - 1) + textRange.start(), lineBreaks.get(lineNumber) + textRange.start());
}
@ -126,9 +126,9 @@ public class AtomicTextBlock implements TextBlock {
public int getNextLinebreak(int fromIndex) {
return lineBreaks.stream()//
.filter(linebreak -> linebreak > fromIndex - boundary.start()) //
.filter(linebreak -> linebreak > fromIndex - textRange.start()) //
.findFirst() //
.orElse(searchText.length()) + boundary.start();
.orElse(searchText.length()) + textRange.start();
}
@ -136,43 +136,43 @@ public class AtomicTextBlock implements TextBlock {
public int getPreviousLinebreak(int fromIndex) {
return lineBreaks.stream()//
.filter(linebreak -> linebreak <= fromIndex - boundary.start())//
.filter(linebreak -> linebreak <= fromIndex - textRange.start())//
.reduce((a, b) -> b)//
.orElse(0) + boundary.start();
.orElse(0) + textRange.start();
}
@Override
public Rectangle2D getPosition(int stringIdx) {
return positions.get(stringIdxToPositionIdx.get(stringIdx - boundary.start()));
return positions.get(stringIdxToPositionIdx.get(stringIdx - textRange.start()));
}
@Override
public List<Rectangle2D> getPositions(Boundary stringBoundary) {
public List<Rectangle2D> getPositions(TextRange stringTextRange) {
if (!containsBoundary(stringBoundary)) {
throw new IndexOutOfBoundsException(format("%s is out of bounds for %s", stringBoundary, this.boundary));
if (!containsTextRange(stringTextRange)) {
throw new IndexOutOfBoundsException(format("%s is out of bounds for %s", stringTextRange, this.textRange));
}
if (stringBoundary.length() == 0) {
if (stringTextRange.length() == 0) {
return Collections.emptyList();
}
int startPositionIdx = stringIdxToPositionIdx.get(stringBoundary.start() - this.boundary.start());
int startPositionIdx = stringIdxToPositionIdx.get(stringTextRange.start() - this.textRange.start());
if (stringBoundary.end() == this.boundary.end()) {
if (stringTextRange.end() == this.textRange.end()) {
return positions.subList(startPositionIdx, positions.size());
}
return positions.subList(startPositionIdx, stringIdxToPositionIdx.get(stringBoundary.end() - this.boundary.start()));
return positions.subList(startPositionIdx, stringIdxToPositionIdx.get(stringTextRange.end() - this.textRange.start()));
}
public Map<Page, List<Rectangle2D>> getPositionsPerPage(Boundary stringBoundary) {
public Map<Page, List<Rectangle2D>> getPositionsPerPage(TextRange stringTextRange) {
List<Rectangle2D> rectanglesPerLine = stringBoundary.split(getAllLineBreaksInBoundary(stringBoundary))
List<Rectangle2D> rectanglesPerLine = stringTextRange.split(getAllLineBreaksInBoundary(stringTextRange))
.stream()
.map(this::getPositions)
.map(RectangleTransformations::rectangleBBoxWithGaps)
@ -185,18 +185,18 @@ public class AtomicTextBlock implements TextBlock {
@Override
public String subSequenceWithLineBreaks(Boundary boundary) {
public String subSequenceWithLineBreaks(TextRange textRange) {
if (boundary.length() == 0 || !getBoundary().contains(boundary)) {
if (textRange.length() == 0 || !getTextRange().contains(textRange)) {
return "";
}
Set<Integer> lbInBoundary = lineBreaks.stream().map(i -> i + boundary.start()).filter(boundary::contains).collect(Collectors.toSet());
if (boundary.end() == getBoundary().end()) {
lbInBoundary.add(getBoundary().end());
Set<Integer> lbInBoundary = lineBreaks.stream().map(i -> i + textRange.start()).filter(textRange::contains).collect(Collectors.toSet());
if (textRange.end() == getTextRange().end()) {
lbInBoundary.add(getTextRange().end());
}
StringBuilder sb = new StringBuilder();
for (int i = boundary.start(); i < boundary.end(); i++) {
for (int i = textRange.start(); i < textRange.end(); i++) {
char character = this.charAt(i);
if (lbInBoundary.contains(i + 1)) {
// always plus one, due to the linebreaks being an exclusive end index
@ -215,9 +215,9 @@ public class AtomicTextBlock implements TextBlock {
}
private List<Integer> getAllLineBreaksInBoundary(Boundary boundary) {
private List<Integer> getAllLineBreaksInBoundary(TextRange textRange) {
return getLineBreaks().stream().map(linebreak -> linebreak + this.boundary.start()).filter(boundary::contains).toList();
return getLineBreaks().stream().map(linebreak -> linebreak + this.textRange.start()).filter(textRange::contains).toList();
}

View File

@ -10,7 +10,7 @@ import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
import lombok.AccessLevel;
@ -23,7 +23,7 @@ public class ConcatenatedTextBlock implements TextBlock {
List<AtomicTextBlock> atomicTextBlocks;
String searchText;
Boundary boundary;
TextRange textRange;
public static ConcatenatedTextBlock empty() {
@ -36,12 +36,12 @@ public class ConcatenatedTextBlock implements TextBlock {
this.atomicTextBlocks = new LinkedList<>();
if (atomicTextBlocks.isEmpty()) {
boundary = new Boundary(-1, -1);
textRange = new TextRange(-1, -1);
return;
}
var firstTextBlock = atomicTextBlocks.get(0);
this.atomicTextBlocks.add(firstTextBlock);
boundary = new Boundary(firstTextBlock.getBoundary().start(), firstTextBlock.getBoundary().end());
textRange = new TextRange(firstTextBlock.getTextRange().start(), firstTextBlock.getTextRange().end());
atomicTextBlocks.subList(1, atomicTextBlocks.size()).forEach(this::concat);
}
@ -50,13 +50,13 @@ public class ConcatenatedTextBlock implements TextBlock {
public ConcatenatedTextBlock concat(TextBlock textBlock) {
if (this.atomicTextBlocks.isEmpty()) {
boundary.setStart(textBlock.getBoundary().start());
boundary.setEnd(textBlock.getBoundary().end());
} else if (boundary.end() != textBlock.getBoundary().start()) {
throw new UnsupportedOperationException(format("Can only concat consecutive TextBlocks, trying to concat %s and %s", boundary, textBlock.getBoundary()));
textRange.setStart(textBlock.getTextRange().start());
textRange.setEnd(textBlock.getTextRange().end());
} else if (textRange.end() != textBlock.getTextRange().start()) {
throw new UnsupportedOperationException(format("Can only concat consecutive TextBlocks, trying to concat %s and %s", textRange, textBlock.getTextRange()));
}
this.atomicTextBlocks.addAll(textBlock.getAtomicTextBlocks());
boundary.setEnd(textBlock.getBoundary().end());
textRange.setEnd(textBlock.getTextRange().end());
this.searchText = null;
return this;
}
@ -64,13 +64,13 @@ public class ConcatenatedTextBlock implements TextBlock {
private AtomicTextBlock getAtomicTextBlockByStringIndex(int stringIdx) {
return atomicTextBlocks.stream().filter(textBlock -> textBlock.getBoundary().contains(stringIdx)).findAny().orElseThrow(IndexOutOfBoundsException::new);
return atomicTextBlocks.stream().filter(textBlock -> textBlock.getTextRange().contains(stringIdx)).findAny().orElseThrow(IndexOutOfBoundsException::new);
}
private List<AtomicTextBlock> getAllAtomicTextBlocksPartiallyInStringBoundary(Boundary boundary) {
private List<AtomicTextBlock> getAllAtomicTextBlocksPartiallyInStringBoundary(TextRange textRange) {
return atomicTextBlocks.stream().filter(tb -> tb.getBoundary().intersects(boundary)).toList();
return atomicTextBlocks.stream().filter(tb -> tb.getTextRange().intersects(textRange)).toList();
}
@ -121,99 +121,99 @@ public class ConcatenatedTextBlock implements TextBlock {
return getAtomicTextBlockByStringIndex(stringIdx).getPosition(stringIdx);
}
public Boundary getLineBoundary(int lineNumber) {
public TextRange getLineTextRange(int lineNumber) {
if (atomicTextBlocks.size() == 1) {
return atomicTextBlocks.get(0).getLineBoundary(lineNumber);
return atomicTextBlocks.get(0).getLineTextRange(lineNumber);
}
int lineNumberInCurrentBlock = lineNumber;
for (AtomicTextBlock atomicTextBlock : atomicTextBlocks) {
if (lineNumberInCurrentBlock < atomicTextBlock.numberOfLines()) {
return atomicTextBlock.getLineBoundary(lineNumberInCurrentBlock);
return atomicTextBlock.getLineTextRange(lineNumberInCurrentBlock);
}
lineNumberInCurrentBlock -= atomicTextBlock.numberOfLines();
}
return new Boundary(boundary.start(), boundary.start());
return new TextRange(textRange.start(), textRange.start());
}
@Override
public List<Rectangle2D> getPositions(Boundary stringBoundary) {
public List<Rectangle2D> getPositions(TextRange stringTextRange) {
List<AtomicTextBlock> textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(stringBoundary);
List<AtomicTextBlock> textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(stringTextRange);
if (textBlocks.isEmpty()) {
return Collections.emptyList();
}
if (textBlocks.size() == 1) {
return textBlocks.get(0).getPositions(stringBoundary);
return textBlocks.get(0).getPositions(stringTextRange);
}
AtomicTextBlock firstTextBlock = textBlocks.get(0);
List<Rectangle2D> positions = new LinkedList<>(firstTextBlock.getPositions(new Boundary(stringBoundary.start(), firstTextBlock.getBoundary().end())));
List<Rectangle2D> positions = new LinkedList<>(firstTextBlock.getPositions(new TextRange(stringTextRange.start(), firstTextBlock.getTextRange().end())));
for (AtomicTextBlock textBlock : textBlocks.subList(1, textBlocks.size() - 1)) {
positions.addAll(textBlock.getPositions());
}
var lastTextBlock = textBlocks.get(textBlocks.size() - 1);
positions.addAll(lastTextBlock.getPositions(new Boundary(lastTextBlock.getBoundary().start(), stringBoundary.end())));
positions.addAll(lastTextBlock.getPositions(new TextRange(lastTextBlock.getTextRange().start(), stringTextRange.end())));
return positions;
}
@Override
public Map<Page, List<Rectangle2D>> getPositionsPerPage(Boundary stringBoundary) {
public Map<Page, List<Rectangle2D>> getPositionsPerPage(TextRange stringTextRange) {
List<AtomicTextBlock> textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(stringBoundary);
List<AtomicTextBlock> textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(stringTextRange);
if (textBlocks.isEmpty()) {
return new HashMap<>();
}
if (textBlocks.size() == 1) {
return textBlocks.get(0).getPositionsPerPage(stringBoundary);
return textBlocks.get(0).getPositionsPerPage(stringTextRange);
}
AtomicTextBlock firstTextBlock = textBlocks.get(0);
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = firstTextBlock.getPositionsPerPage(new Boundary(stringBoundary.start(), firstTextBlock.getBoundary().end()));
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = firstTextBlock.getPositionsPerPage(new TextRange(stringTextRange.start(), firstTextBlock.getTextRange().end()));
for (AtomicTextBlock textBlock : textBlocks.subList(1, textBlocks.size() - 1)) {
rectanglesPerLinePerPage = mergeEntityPositionsWithSamePageNode(rectanglesPerLinePerPage, textBlock.getPositionsPerPage(textBlock.getBoundary()));
rectanglesPerLinePerPage = mergeEntityPositionsWithSamePageNode(rectanglesPerLinePerPage, textBlock.getPositionsPerPage(textBlock.getTextRange()));
}
AtomicTextBlock lastTextBlock = textBlocks.get(textBlocks.size() - 1);
rectanglesPerLinePerPage = mergeEntityPositionsWithSamePageNode(rectanglesPerLinePerPage,
lastTextBlock.getPositionsPerPage(new Boundary(lastTextBlock.getBoundary().start(), stringBoundary.end())));
lastTextBlock.getPositionsPerPage(new TextRange(lastTextBlock.getTextRange().start(), stringTextRange.end())));
return rectanglesPerLinePerPage;
}
@Override
public String subSequenceWithLineBreaks(Boundary boundary) {
public String subSequenceWithLineBreaks(TextRange textRange) {
if (boundary.length() == 0 || !getBoundary().contains(boundary)) {
if (textRange.length() == 0 || !getTextRange().contains(textRange)) {
return "";
}
List<AtomicTextBlock> textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(boundary);
List<AtomicTextBlock> textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(textRange);
if (textBlocks.size() == 1) {
return textBlocks.get(0).subSequenceWithLineBreaks(boundary);
return textBlocks.get(0).subSequenceWithLineBreaks(textRange);
}
StringBuilder sb = new StringBuilder();
AtomicTextBlock firstTextBlock = textBlocks.get(0);
sb.append(firstTextBlock.subSequenceWithLineBreaks(new Boundary(boundary.start(), firstTextBlock.getBoundary().end())));
sb.append(firstTextBlock.subSequenceWithLineBreaks(new TextRange(textRange.start(), firstTextBlock.getTextRange().end())));
for (AtomicTextBlock textBlock : textBlocks.subList(1, textBlocks.size() - 1)) {
sb.append(textBlock.searchTextWithLineBreaks());
}
var lastTextBlock = textBlocks.get(textBlocks.size() - 1);
sb.append(lastTextBlock.subSequenceWithLineBreaks(new Boundary(lastTextBlock.getBoundary().start(), boundary.end())));
sb.append(lastTextBlock.subSequenceWithLineBreaks(new TextRange(lastTextBlock.getTextRange().start(), textRange.end())));
return sb.toString();
}

View File

@ -10,7 +10,7 @@ import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
@ -22,7 +22,7 @@ public interface TextBlock extends CharSequence {
List<AtomicTextBlock> getAtomicTextBlocks();
Boundary getBoundary();
TextRange getTextRange();
int getNextLinebreak(int fromIndex);
@ -31,7 +31,7 @@ public interface TextBlock extends CharSequence {
int getPreviousLinebreak(int fromIndex);
Boundary getLineBoundary(int lineNumber);
TextRange getLineTextRange(int lineNumber);
@ -41,13 +41,13 @@ public interface TextBlock extends CharSequence {
Rectangle2D getPosition(int stringIdx);
List<Rectangle2D> getPositions(Boundary stringBoundary);
List<Rectangle2D> getPositions(TextRange stringTextRange);
Map<Page, List<Rectangle2D>> getPositionsPerPage(Boundary stringBoundary);
Map<Page, List<Rectangle2D>> getPositionsPerPage(TextRange stringTextRange);
String subSequenceWithLineBreaks(Boundary boundary);
String subSequenceWithLineBreaks(TextRange textRange);
int numberOfLines();
@ -55,13 +55,13 @@ public interface TextBlock extends CharSequence {
default CharSequence getLine(int lineNumber) {
return subSequence(getLineBoundary(lineNumber));
return subSequence(getLineTextRange(lineNumber));
}
default List<Rectangle2D> getLinePositions(int lineNumber) {
return getPositions(getLineBoundary(lineNumber));
return getPositions(getLineTextRange(lineNumber));
}
@ -72,13 +72,13 @@ public interface TextBlock extends CharSequence {
default String searchTextWithLineBreaks() {
return subSequenceWithLineBreaks(getBoundary());
return subSequenceWithLineBreaks(getTextRange());
}
default int indexOf(String searchTerm) {
return indexOf(searchTerm, getBoundary().start());
return indexOf(searchTerm, getTextRange().start());
}
@ -88,10 +88,10 @@ public interface TextBlock extends CharSequence {
}
default Set<Page> getPages(Boundary boundary) {
default Set<Page> getPages(TextRange textRange) {
return getAtomicTextBlocks().stream()
.filter(atomicTextBlock -> atomicTextBlock.getBoundary().intersects(boundary))
.filter(atomicTextBlock -> atomicTextBlock.getTextRange().intersects(textRange))
.map(AtomicTextBlock::getPage)
.collect(Collectors.toUnmodifiableSet());
}
@ -99,38 +99,38 @@ public interface TextBlock extends CharSequence {
default int indexOf(String searchTerm, int startOffset) {
int start = getSearchText().indexOf(searchTerm, startOffset - getBoundary().start());
int start = getSearchText().indexOf(searchTerm, startOffset - getTextRange().start());
if (start == -1) {
return -1;
}
return start + getBoundary().start();
return start + getTextRange().start();
}
default CharSequence getFirstLine() {
return subSequence(getBoundary().start(), getNextLinebreak(getBoundary().start()));
return subSequence(getTextRange().start(), getNextLinebreak(getTextRange().start()));
}
default boolean containsBoundary(Boundary boundary) {
default boolean containsTextRange(TextRange textRange) {
if (boundary.end() < boundary.start()) {
throw new IllegalArgumentException(format("Invalid %s, StartIndex must be smaller than EndIndex", boundary));
if (textRange.end() < textRange.start()) {
throw new IllegalArgumentException(format("Invalid %s, StartIndex must be smaller than EndIndex", textRange));
}
return getBoundary().contains(boundary);
return getTextRange().contains(textRange);
}
default boolean containsIndex(int stringIndex) {
return getBoundary().contains(stringIndex);
return getTextRange().contains(stringIndex);
}
default CharSequence subSequence(Boundary boundary) {
default CharSequence subSequence(TextRange textRange) {
return subSequence(boundary.start(), boundary.end());
return subSequence(textRange.start(), textRange.end());
}
@ -147,21 +147,21 @@ public interface TextBlock extends CharSequence {
@Override
default CharSequence subSequence(int start, int end) {
return getSearchText().substring(start - getBoundary().start(), end - getBoundary().start());
return getSearchText().substring(start - getTextRange().start(), end - getTextRange().start());
}
@Override
default int length() {
return getBoundary().length();
return getTextRange().length();
}
@Override
default char charAt(int index) {
return getSearchText().charAt(index - getBoundary().start());
return getSearchText().charAt(index - getTextRange().start());
}
}

View File

@ -18,12 +18,13 @@ import org.kie.api.runtime.KieSession;
import com.google.common.base.Functions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.document.graph.ConsecutiveBoundaryCollector;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.ManualChangeOverwrite;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.PositionOnPage;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.NodeType;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
@ -55,28 +56,28 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> betweenStrings(String start, String stop, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> betweenStrings(String start, String stop, String type, EntityType entityType, SemanticNode node) {
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock());
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock());
List<TextRange> startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock());
List<TextRange> stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock());
return betweenBoundaries(startBoundaries, stopBoundaries, type, entityType, node);
}
public Stream<RedactionEntity> betweenStringsIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> betweenStringsIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock());
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock());
List<TextRange> startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock());
List<TextRange> stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock());
return betweenBoundaries(startBoundaries, stopBoundaries, type, entityType, node);
}
public Stream<RedactionEntity> betweenStringsIncludeStart(String start, String stop, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> betweenStringsIncludeStart(String start, String stop, String type, EntityType entityType, SemanticNode node) {
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock());
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock());
List<TextRange> startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock());
List<TextRange> stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock());
startBoundaries.forEach(boundary -> {
boundary.setStart(boundary.start() - start.length());
@ -87,10 +88,10 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> betweenStringsIncludeStartIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> betweenStringsIncludeStartIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock());
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock());
List<TextRange> startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock());
List<TextRange> stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock());
startBoundaries.forEach(boundary -> {
boundary.setStart(boundary.start() - start.length());
@ -101,10 +102,10 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> betweenStringsIncludeEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> betweenStringsIncludeEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) {
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock());
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock());
List<TextRange> startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock());
List<TextRange> stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock());
stopBoundaries.forEach(boundary -> {
boundary.setStart(boundary.start() + stop.length());
@ -115,10 +116,10 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> betweenStringsIncludeEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> betweenStringsIncludeEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock());
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock());
List<TextRange> startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock());
List<TextRange> stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock());
stopBoundaries.forEach(boundary -> {
boundary.setStart(boundary.start() + stop.length());
@ -129,10 +130,10 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> betweenStringsIncludeStartAndEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> betweenStringsIncludeStartAndEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) {
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock());
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock());
List<TextRange> startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock());
List<TextRange> stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock());
startBoundaries.forEach(boundary -> {
boundary.setStart(boundary.start() - start.length());
@ -147,10 +148,10 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> betweenStringsIncludeStartAndEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> betweenStringsIncludeStartAndEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock());
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock());
List<TextRange> startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock());
List<TextRange> stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock());
startBoundaries.forEach(boundary -> {
boundary.setStart(boundary.start() - start.length());
@ -165,32 +166,32 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> betweenRegexes(String regexStart, String regexStop, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> betweenRegexes(String regexStart, String regexStop, String type, EntityType entityType, SemanticNode node) {
TextBlock textBlock = node.getTextBlock();
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByRegex(regexStart, textBlock);
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByRegex(regexStop, textBlock);
List<TextRange> startBoundaries = RedactionSearchUtility.findBoundariesByRegex(regexStart, textBlock);
List<TextRange> stopBoundaries = RedactionSearchUtility.findBoundariesByRegex(regexStop, textBlock);
return betweenBoundaries(startBoundaries, stopBoundaries, type, entityType, node);
}
public Stream<RedactionEntity> betweenRegexesIgnoreCase(String regexStart, String regexStop, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> betweenRegexesIgnoreCase(String regexStart, String regexStop, String type, EntityType entityType, SemanticNode node) {
TextBlock textBlock = node.getTextBlock();
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByRegexIgnoreCase(regexStart, 0, textBlock);
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByRegexIgnoreCase(regexStop, 0, textBlock);
List<TextRange> startBoundaries = RedactionSearchUtility.findBoundariesByRegexIgnoreCase(regexStart, 0, textBlock);
List<TextRange> stopBoundaries = RedactionSearchUtility.findBoundariesByRegexIgnoreCase(regexStop, 0, textBlock);
return betweenBoundaries(startBoundaries, stopBoundaries, type, entityType, node);
}
public Stream<RedactionEntity> betweenBoundaries(List<Boundary> startBoundaries, List<Boundary> stopBoundaries, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> betweenBoundaries(List<TextRange> startBoundaries, List<TextRange> stopBoundaries, String type, EntityType entityType, SemanticNode node) {
if (startBoundaries.isEmpty() || stopBoundaries.isEmpty()) {
return Stream.empty();
}
List<Boundary> entityBoundaries = findNonOverlappingBoundariesBetweenBoundariesWithMinimalDistances(startBoundaries, stopBoundaries);
List<TextRange> entityBoundaries = findNonOverlappingBoundariesBetweenBoundariesWithMinimalDistances(startBoundaries, stopBoundaries);
return entityBoundaries.stream()
.map(boundary -> boundary.trim(node.getTextBlock()))
.filter(boundary -> isValidEntityBoundary(node.getTextBlock(), boundary))
@ -200,23 +201,23 @@ public class EntityCreationService {
}
private static List<Boundary> findNonOverlappingBoundariesBetweenBoundariesWithMinimalDistances(List<Boundary> startBoundaries, List<Boundary> stopBoundaries) {
private static List<TextRange> findNonOverlappingBoundariesBetweenBoundariesWithMinimalDistances(List<TextRange> startBoundaries, List<TextRange> stopBoundaries) {
List<Boundary> entityBoundaries = new LinkedList<>();
for (Boundary startBoundary : startBoundaries) {
Optional<Boundary> optionalStopBoundaryWithMinimalDistance = stopBoundaries.stream()
.filter(stopBoundary -> stopBoundary.start() > startBoundary.end())
.min(Comparator.comparingInt(Boundary::start));
List<TextRange> entityBoundaries = new LinkedList<>();
for (TextRange startTextRange : startBoundaries) {
Optional<TextRange> optionalStopBoundaryWithMinimalDistance = stopBoundaries.stream()
.filter(stopBoundary -> stopBoundary.start() > startTextRange.end())
.min(Comparator.comparingInt(TextRange::start));
if (optionalStopBoundaryWithMinimalDistance.isEmpty()) {
break;
}
entityBoundaries.add(new Boundary(startBoundary.end(), optionalStopBoundaryWithMinimalDistance.get().start()));
entityBoundaries.add(new TextRange(startTextRange.end(), optionalStopBoundaryWithMinimalDistance.get().start()));
}
return removeOuterOverlappingBoundaries(entityBoundaries);
}
private static List<Boundary> removeOuterOverlappingBoundaries(List<Boundary> entityBoundaries) {
private static List<TextRange> removeOuterOverlappingBoundaries(List<TextRange> entityBoundaries) {
/*
In some cases we get boundaries, where one contains the other. This happens for Example when we have two start boundaries and one stop boundary after the two start boundaries.
Then we get two boundaries where one is entirely contained in the other. So we want to remove the outer boundary.
@ -230,9 +231,9 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> bySearchImplementation(SearchImplementation searchImplementation, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> bySearchImplementation(SearchImplementation searchImplementation, String type, EntityType entityType, SemanticNode node) {
return searchImplementation.getBoundaries(node.getTextBlock(), node.getBoundary())
return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange())
.stream()
.filter(boundary -> isValidEntityBoundary(node.getTextBlock(), boundary))
.map(bounds -> byBoundary(bounds, type, entityType, node))
@ -241,11 +242,11 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> lineAfterStrings(List<String> strings, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> lineAfterStrings(List<String> strings, String type, EntityType entityType, SemanticNode node) {
TextBlock textBlock = node.getTextBlock();
SearchImplementation searchImplementation = new SearchImplementation(strings, false);
return searchImplementation.getBoundaries(textBlock, node.getBoundary())
return searchImplementation.getBoundaries(textBlock, node.getTextRange())
.stream()
.map(boundary -> toLineAfterBoundary(textBlock, boundary))
.filter(boundary -> isValidEntityBoundary(textBlock, boundary))
@ -255,11 +256,11 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> lineAfterStringsIgnoreCase(List<String> strings, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> lineAfterStringsIgnoreCase(List<String> strings, String type, EntityType entityType, SemanticNode node) {
TextBlock textBlock = node.getTextBlock();
SearchImplementation searchImplementation = new SearchImplementation(strings, true);
return searchImplementation.getBoundaries(textBlock, node.getBoundary())
return searchImplementation.getBoundaries(textBlock, node.getTextRange())
.stream()
.map(boundary -> toLineAfterBoundary(textBlock, boundary))
.filter(boundary -> isValidEntityBoundary(textBlock, boundary))
@ -269,7 +270,7 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> lineAfterString(String string, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> lineAfterString(String string, String type, EntityType entityType, SemanticNode node) {
TextBlock textBlock = node.getTextBlock();
return RedactionSearchUtility.findBoundariesByString(string, textBlock)
@ -282,7 +283,7 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> lineAfterStringIgnoreCase(String string, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> lineAfterStringIgnoreCase(String string, String type, EntityType entityType, SemanticNode node) {
TextBlock textBlock = node.getTextBlock();
return RedactionSearchUtility.findBoundariesByStringIgnoreCase(string, textBlock)
@ -295,7 +296,7 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> lineAfterStringAcrossColumns(String string, String type, EntityType entityType, Table tableNode) {
public Stream<TextEntity> lineAfterStringAcrossColumns(String string, String type, EntityType entityType, Table tableNode) {
return tableNode.streamTableCells()
.flatMap(tableCell -> lineAfterBoundariesAcrossColumns(RedactionSearchUtility.findBoundariesByString(string, tableCell.getTextBlock()),
@ -306,7 +307,7 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> lineAfterStringAcrossColumnsIgnoreCase(String string, String type, EntityType entityType, Table tableNode) {
public Stream<TextEntity> lineAfterStringAcrossColumnsIgnoreCase(String string, String type, EntityType entityType, Table tableNode) {
return tableNode.streamTableCells()
.flatMap(tableCell -> lineAfterBoundariesAcrossColumns(RedactionSearchUtility.findBoundariesByStringIgnoreCase(string, tableCell.getTextBlock()),
@ -327,7 +328,7 @@ public class EntityCreationService {
* @param tableNode the table node
* @return a stream of RedactionEntities
*/
private Stream<RedactionEntity> lineAfterBoundariesAcrossColumns(List<Boundary> boundaries, TableCell tableCell, String type, EntityType entityType, Table tableNode) {
private Stream<TextEntity> lineAfterBoundariesAcrossColumns(List<TextRange> boundaries, TableCell tableCell, String type, EntityType entityType, Table tableNode) {
return boundaries.stream()
.map(boundary -> RectangleTransformations.rectangle2DBBox(tableCell.getTextBlock().getPositions(boundary)))
@ -344,13 +345,13 @@ public class EntityCreationService {
}
public Optional<RedactionEntity> semanticNodeAfterString(SemanticNode semanticNode, String string, String type, EntityType entityType) {
public Optional<TextEntity> semanticNodeAfterString(SemanticNode semanticNode, String string, String type, EntityType entityType) {
var textBlock = semanticNode.getTextBlock();
int startIndex = Math.min(textBlock.indexOf(string), 0);
var boundary = new Boundary(startIndex, semanticNode.getBoundary().end());
var boundary = new TextRange(startIndex, semanticNode.getTextRange().end());
if (boundary.length() > 0) {
boundary = new Boundary(boundary.start(), boundary.end() - 1);
boundary = new TextRange(boundary.start(), boundary.end() - 1);
}
if (!isValidEntityBoundary(textBlock, boundary)) {
return Optional.empty();
@ -359,31 +360,31 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, SemanticNode node) {
return byRegexWithLineBreaks(regexPattern, type, entityType, 0, node);
}
public Stream<RedactionEntity> byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) {
return byRegexWithLineBreaksIgnoreCase(regexPattern, type, entityType, 0, node);
}
public Stream<RedactionEntity> byRegex(String regexPattern, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> byRegex(String regexPattern, String type, EntityType entityType, SemanticNode node) {
return byRegex(regexPattern, type, entityType, 0, node);
}
public Stream<RedactionEntity> byRegexIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> byRegexIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) {
return byRegexIgnoreCase(regexPattern, type, entityType, 0, node);
}
public Stream<RedactionEntity> byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
public Stream<TextEntity> byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
return RedactionSearchUtility.findBoundariesByRegexWithLineBreaks(regexPattern, group, node.getTextBlock())
.stream()
@ -393,7 +394,7 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
public Stream<TextEntity> byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
return RedactionSearchUtility.findBoundariesByRegexWithLineBreaksIgnoreCase(regexPattern, group, node.getTextBlock())
.stream()
@ -403,7 +404,7 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> byRegex(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
public Stream<TextEntity> byRegex(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
return RedactionSearchUtility.findBoundariesByRegex(regexPattern, group, node.getTextBlock())
.stream()
@ -413,7 +414,7 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> byRegexIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
public Stream<TextEntity> byRegexIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
return RedactionSearchUtility.findBoundariesByRegexIgnoreCase(regexPattern, group, node.getTextBlock())
.stream()
@ -423,7 +424,7 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> byString(String keyword, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> byString(String keyword, String type, EntityType entityType, SemanticNode node) {
return RedactionSearchUtility.findBoundariesByString(keyword, node.getTextBlock())
.stream()
@ -433,7 +434,7 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> byStringIgnoreCase(String keyword, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> byStringIgnoreCase(String keyword, String type, EntityType entityType, SemanticNode node) {
return RedactionSearchUtility.findBoundariesByStringIgnoreCase(keyword, node.getTextBlock())
.stream()
@ -443,16 +444,16 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> bySemanticNodeParagraphsOnly(SemanticNode node, String type, EntityType entityType) {
public Stream<TextEntity> bySemanticNodeParagraphsOnly(SemanticNode node, String type, EntityType entityType) {
return node.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(semanticNode -> bySemanticNode(semanticNode, type, entityType)).filter(Optional::isPresent).map(Optional::get);
}
public Stream<RedactionEntity> bySemanticNodeParagraphsOnlyMergeConsecutive(SemanticNode node, String type, EntityType entityType) {
public Stream<TextEntity> bySemanticNodeParagraphsOnlyMergeConsecutive(SemanticNode node, String type, EntityType entityType) {
return node.streamAllSubNodesOfType(NodeType.PARAGRAPH)
.map(SemanticNode::getBoundary)
.map(SemanticNode::getTextRange)
.collect(new ConsecutiveBoundaryCollector())
.stream()
.map(boundary -> byBoundary(boundary, type, entityType, node))
@ -461,42 +462,42 @@ public class EntityCreationService {
}
public Optional<RedactionEntity> semanticNodeAfterString(String string, String type, EntityType entityType, SemanticNode node) {
public Optional<TextEntity> semanticNodeAfterString(String string, String type, EntityType entityType, SemanticNode node) {
if (!node.containsString(string)) {
return Optional.empty();
}
Boundary boundary = new Boundary(node.getTextBlock().indexOf(string) + string.length(), node.getBoundary().end());
return byBoundary(boundary, type, entityType, node);
TextRange textRange = new TextRange(node.getTextBlock().indexOf(string) + string.length(), node.getTextRange().end());
return byBoundary(textRange, type, entityType, node);
}
public Optional<RedactionEntity> bySemanticNode(SemanticNode node, String type, EntityType entityType) {
public Optional<TextEntity> bySemanticNode(SemanticNode node, String type, EntityType entityType) {
Boundary boundary = node.getTextBlock().getBoundary();
TextRange textRange = node.getTextBlock().getTextRange();
if (boundary.length() > 0) {
boundary = new Boundary(boundary.start(), boundary.end() - 1);
if (textRange.length() > 0) {
textRange = new TextRange(textRange.start(), textRange.end() - 1);
}
if (!isValidEntityBoundary(node.getTextBlock(), boundary)) {
if (!isValidEntityBoundary(node.getTextBlock(), textRange)) {
return Optional.empty();
}
return byBoundary(boundary, type, entityType, node);
return byBoundary(textRange, type, entityType, node);
}
public Optional<RedactionEntity> byPrefixExpansionRegex(RedactionEntity entity, String regexPattern) {
public Optional<TextEntity> byPrefixExpansionRegex(TextEntity entity, String regexPattern) {
int expandedStart = RedactionSearchUtility.getExpandedStartByRegex(entity, regexPattern);
return byBoundary(new Boundary(expandedStart, entity.getBoundary().end()), entity.getType(), entity.getEntityType(), entity.getDeepestFullyContainingNode());
return byBoundary(new TextRange(expandedStart, entity.getTextRange().end()), entity.getType(), entity.getEntityType(), entity.getDeepestFullyContainingNode());
}
public Optional<RedactionEntity> bySuffixExpansionRegex(RedactionEntity entity, String regexPattern) {
public Optional<TextEntity> bySuffixExpansionRegex(TextEntity entity, String regexPattern) {
int expandedEnd = RedactionSearchUtility.getExpandedEndByRegex(entity, regexPattern);
expandedEnd = truncateEndIfLineBreakIsBetween(entity.getBoundary().end(), expandedEnd, entity.getDeepestFullyContainingNode().getTextBlock());
return byBoundary(new Boundary(entity.getBoundary().start(), expandedEnd), entity.getType(), entity.getEntityType(), entity.getDeepestFullyContainingNode());
expandedEnd = truncateEndIfLineBreakIsBetween(entity.getTextRange().end(), expandedEnd, entity.getDeepestFullyContainingNode().getTextBlock());
return byBoundary(new TextRange(entity.getTextRange().start(), expandedEnd), entity.getType(), entity.getEntityType(), entity.getDeepestFullyContainingNode());
}
@ -514,19 +515,19 @@ public class EntityCreationService {
* If the document already contains an equal redaction entity, then the original Entity is returned.
* Also inserts the Entity into the kieSession.
*
* @param boundary The boundary of the redaction entity.
* @param textRange The boundary of the redaction entity.
* @param type The type of the redaction entity.
* @param entityType The entity type of the redaction entity.
* @param node The semantic node to associate with the redaction entity.
* @return An Optional containing the redaction entity, or the previous entity if the entity already exists.
*/
public Optional<RedactionEntity> byBoundary(Boundary boundary, String type, EntityType entityType, SemanticNode node) {
public Optional<TextEntity> byBoundary(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
if (!node.getBoundary().contains(boundary)) {
throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", boundary, node.getBoundary(), node));
if (!node.getTextRange().contains(textRange)) {
throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", textRange, node.getTextRange(), node));
}
Boundary trimmedBoundary = boundary.trim(node.getTextBlock());
RedactionEntity entity = RedactionEntity.initialEntityNode(trimmedBoundary, type, entityType);
TextRange trimmedTextRange = textRange.trim(node.getTextBlock());
TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType);
if (node.getEntities().contains(entity)) {
return node.getEntities().stream().filter(entity::equals).peek(e -> e.addEngine(Engine.RULE)).findAny();
}
@ -537,16 +538,16 @@ public class EntityCreationService {
}
public RedactionEntity forceByBoundary(Boundary boundary, String type, EntityType entityType, SemanticNode node) {
public TextEntity forceByBoundary(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
Boundary trimmedBoundary = boundary.trim(node.getTextBlock());
RedactionEntity entity = RedactionEntity.initialEntityNode(trimmedBoundary, type, entityType);
TextRange trimmedTextRange = textRange.trim(node.getTextBlock());
TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType);
addEntityToGraph(entity, node);
return entity;
}
public RedactionEntity mergeEntitiesOfSameType(List<RedactionEntity> entitiesToMerge, String type, EntityType entityType, SemanticNode node) {
public TextEntity mergeEntitiesOfSameType(List<TextEntity> entitiesToMerge, String type, EntityType entityType, SemanticNode node) {
if (!allEntitiesIntersectAndHaveSameTypes(entitiesToMerge)) {
throw new IllegalArgumentException("Provided entities can not be merged, since they do not intersect or are not the same type!" + entitiesToMerge);
@ -558,14 +559,17 @@ public class EntityCreationService {
return entitiesToMerge.get(0);
}
RedactionEntity mergedEntity = RedactionEntity.initialEntityNode(Boundary.merge(entitiesToMerge.stream().map(RedactionEntity::getBoundary).toList()), type, entityType);
TextEntity mergedEntity = TextEntity.initialEntityNode(TextRange.merge(entitiesToMerge.stream().map(TextEntity::getTextRange).toList()), type, entityType);
mergedEntity.addEngines(entitiesToMerge.stream().flatMap(entityNode -> entityNode.getEngines().stream()).collect(Collectors.toSet()));
entitiesToMerge.stream().map(RedactionEntity::getMatchedRuleList).flatMap(Collection::stream).forEach(matchedRule -> mergedEntity.getMatchedRuleList().add(matchedRule));
entitiesToMerge.stream().map(TextEntity::getMatchedRuleList).flatMap(Collection::stream).forEach(matchedRule -> mergedEntity.getMatchedRuleList().add(matchedRule));
entitiesToMerge.stream()
.map(TextEntity::getManualOverwrite)
.map(ManualChangeOverwrite::getManualChangeLog)
.flatMap(Collection::stream)
.forEach(manualChange -> mergedEntity.getManualOverwrite().addChange(manualChange));
mergedEntity.setDictionaryEntry(entitiesToMerge.stream().anyMatch(RedactionEntity::isDictionaryEntry));
mergedEntity.setDossierDictionaryEntry(entitiesToMerge.stream().anyMatch(RedactionEntity::isDossierDictionaryEntry));
mergedEntity.setIgnored(entitiesToMerge.stream().allMatch(RedactionEntity::isIgnored));
mergedEntity.setRemoved(entitiesToMerge.stream().allMatch(RedactionEntity::isRemoved));
mergedEntity.setDictionaryEntry(entitiesToMerge.stream().anyMatch(TextEntity::isDictionaryEntry));
mergedEntity.setDossierDictionaryEntry(entitiesToMerge.stream().anyMatch(TextEntity::isDossierDictionaryEntry));
addEntityToGraph(mergedEntity, node);
insertToKieSession(mergedEntity);
@ -573,28 +577,27 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> byEntities(List<RedactionEntity> entities, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> copyEntities(List<TextEntity> entities, String type, EntityType entityType, SemanticNode node) {
return entities.stream().map(entity -> byEntity(type, entityType, node, entity));
return entities.stream().map(entity -> copyEntity(entity, type, entityType, node));
}
private RedactionEntity byEntity(String type, EntityType entityType, SemanticNode node, RedactionEntity entity) {
public TextEntity copyEntity(TextEntity entity, String type, EntityType entityType, SemanticNode node) {
RedactionEntity newEntity = RedactionEntity.initialEntityNode(entity.getBoundary(), type, entityType);
TextEntity newEntity = TextEntity.initialEntityNode(entity.getTextRange(), type, entityType);
newEntity.addEngines(entity.getEngines());
newEntity.addMatchedRules(entity.getMatchedRuleList());
newEntity.getManualOverwrite().addChanges(entity.getManualOverwrite().getManualChangeLog());
newEntity.setDictionaryEntry(entity.isDictionaryEntry());
newEntity.setDossierDictionaryEntry(entity.isDossierDictionaryEntry());
newEntity.setIgnored(entity.isIgnored());
newEntity.setRemoved(entity.isRemoved());
addEntityToGraph(newEntity, node);
insertToKieSession(newEntity);
return newEntity;
}
private void insertToKieSession(RedactionEntity mergedEntity) {
public void insertToKieSession(TextEntity mergedEntity) {
if (kieSession != null) {
kieSession.insert(mergedEntity);
@ -602,25 +605,25 @@ public class EntityCreationService {
}
public RedactionEntity byNerEntity(NerEntities.NerEntity nerEntity, EntityType entityType, SemanticNode semanticNode) {
public TextEntity byNerEntity(NerEntities.NerEntity nerEntity, EntityType entityType, SemanticNode semanticNode) {
var entity = forceByBoundary(nerEntity.boundary(), nerEntity.type(), entityType, semanticNode);
var entity = forceByBoundary(nerEntity.textRange(), nerEntity.type(), entityType, semanticNode);
entity.addEngine(Engine.NER);
insertToKieSession(entity);
return entity;
}
public RedactionEntity byNerEntity(NerEntities.NerEntity nerEntity, String type, EntityType entityType, SemanticNode semanticNode) {
public TextEntity byNerEntity(NerEntities.NerEntity nerEntity, String type, EntityType entityType, SemanticNode semanticNode) {
var entity = forceByBoundary(nerEntity.boundary(), type, entityType, semanticNode);
var entity = forceByBoundary(nerEntity.textRange(), type, entityType, semanticNode);
entity.addEngine(Engine.NER);
insertToKieSession(entity);
return entity;
}
public Stream<RedactionEntity> combineNerEntitiesToCbiAddressDefaults(NerEntities nerEntities, String type, EntityType entityType, SemanticNode semanticNode) {
public Stream<TextEntity> combineNerEntitiesToCbiAddressDefaults(NerEntities nerEntities, String type, EntityType entityType, SemanticNode semanticNode) {
return NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
.map(boundary -> forceByBoundary(boundary, type, entityType, semanticNode))
@ -629,28 +632,28 @@ public class EntityCreationService {
}
public RedactionEntity byTableCellAsHighlight(TableCell tableCell, String type, EntityType entityType) {
public TextEntity byTableCellAsHighlight(TableCell tableCell, String type, EntityType entityType) {
RedactionEntity highlightEntity = RedactionEntity.initialEntityNode(new Boundary(tableCell.getBoundary().start(), tableCell.getBoundary().start()), type, entityType);
TextEntity highlightEntity = TextEntity.initialEntityNode(new TextRange(tableCell.getTextRange().start(), tableCell.getTextRange().start()), type, entityType);
String positionId = IdBuilder.buildId(tableCell.getBBox().keySet(), tableCell.getBBox().values().stream().toList(), type, entityType.name());
highlightEntity.setRedactionPositionsPerPage(tableCell.getBBox()
highlightEntity.setPositionsOnPagePerPage(tableCell.getBBox()
.entrySet()
.stream()
.map(entry -> new RedactionPosition(positionId, entry.getKey(), List.of(entry.getValue())))
.map(entry -> new PositionOnPage(positionId, entry.getKey(), List.of(entry.getValue())))
.toList());
addEntityToGraph(highlightEntity, tableCell);
return highlightEntity;
}
public boolean isValidEntityBoundary(TextBlock textBlock, Boundary boundary) {
public boolean isValidEntityBoundary(TextBlock textBlock, TextRange textRange) {
return boundary.length() > 0 && boundaryIsSurroundedBySeparators(textBlock, boundary);
return textRange.length() > 0 && boundaryIsSurroundedBySeparators(textBlock, textRange);
}
public void addEntityToGraph(RedactionEntity entity, SemanticNode node) {
public void addEntityToGraph(TextEntity entity, SemanticNode node) {
DocumentTree documentTree = node.getDocumentTree();
try {
@ -667,10 +670,10 @@ public class EntityCreationService {
}
private void addEntityToGraph(RedactionEntity entity, DocumentTree documentTree) {
private void addEntityToGraph(TextEntity entity, DocumentTree documentTree) {
SemanticNode containingNode = documentTree.childNodes(Collections.emptyList())
.filter(node -> node.getTextBlock().containsBoundary(entity.getBoundary()))
.filter(node -> node.getTextBlock().containsTextRange(entity.getTextRange()))
.findFirst()
.orElseThrow(() -> new NoSuchElementException("No containing Node found!"));
@ -684,30 +687,30 @@ public class EntityCreationService {
}
private static void addToPages(RedactionEntity entity) {
private static void addToPages(TextEntity entity) {
Set<Page> pages = entity.getDeepestFullyContainingNode().getPages(entity.getBoundary());
Set<Page> pages = entity.getDeepestFullyContainingNode().getPages(entity.getTextRange());
entity.getPages().addAll(pages);
pages.forEach(page -> page.getEntities().add(entity));
}
private static void addEntityToNodeEntitySets(RedactionEntity entity) {
private static void addEntityToNodeEntitySets(TextEntity entity) {
entity.getIntersectingNodes().forEach(node -> node.getEntities().add(entity));
}
private static boolean allEntitiesIntersectAndHaveSameTypes(List<RedactionEntity> entitiesToMerge) {
private static boolean allEntitiesIntersectAndHaveSameTypes(List<TextEntity> entitiesToMerge) {
if (entitiesToMerge.isEmpty()) {
return true;
}
RedactionEntity previousEntity = entitiesToMerge.get(0);
for (RedactionEntity redactionEntity : entitiesToMerge.subList(1, entitiesToMerge.size())) {
boolean typeMatches = redactionEntity.getType().equals(previousEntity.getType());
boolean entityTypeMatches = redactionEntity.getEntityType().equals(previousEntity.getEntityType());
boolean intersects = redactionEntity.intersects(previousEntity);
TextEntity previousEntity = entitiesToMerge.get(0);
for (TextEntity textEntity : entitiesToMerge.subList(1, entitiesToMerge.size())) {
boolean typeMatches = textEntity.getType().equals(previousEntity.getType());
boolean entityTypeMatches = textEntity.getEntityType().equals(previousEntity.getEntityType());
boolean intersects = textEntity.intersects(previousEntity);
if (!typeMatches || !entityTypeMatches || !intersects) {
return false;
}
@ -716,9 +719,9 @@ public class EntityCreationService {
}
private static Boundary toLineAfterBoundary(TextBlock textBlock, Boundary boundary) {
private static TextRange toLineAfterBoundary(TextBlock textBlock, TextRange textRange) {
return new Boundary(boundary.end(), textBlock.getNextLinebreak(boundary.end())).trim(textBlock);
return new TextRange(textRange.end(), textBlock.getNextLinebreak(textRange.end())).trim(textBlock);
}
}

View File

@ -6,7 +6,7 @@ import java.util.Objects;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
@ -19,17 +19,17 @@ public class EntityEnrichmentService {
private final RedactionServiceSettings redactionServiceSettings;
public void enrichEntity(RedactionEntity entity, TextBlock textBlock) {
public void enrichEntity(TextEntity entity, TextBlock textBlock) {
entity.setValue(textBlock.subSequence(entity.getBoundary()).toString());
entity.setTextAfter(findTextAfter(entity.getBoundary().end(), textBlock));
entity.setTextBefore(findTextBefore(entity.getBoundary().start(), textBlock));
entity.setValue(textBlock.subSequence(entity.getTextRange()).toString());
entity.setTextAfter(findTextAfter(entity.getTextRange().end(), textBlock));
entity.setTextBefore(findTextBefore(entity.getTextRange().start(), textBlock));
}
private String findTextAfter(int index, TextBlock textBlock) {
int endOffset = Math.min(index + redactionServiceSettings.getSurroundingWordsOffsetWindow(), textBlock.getBoundary().end());
int endOffset = Math.min(index + redactionServiceSettings.getSurroundingWordsOffsetWindow(), textBlock.getTextRange().end());
String textAfter = textBlock.subSequence(index, endOffset).toString();
if (!textAfter.isBlank()) {
List<String> wordsAfter = splitToWordsAndRemoveEmptyWords(textAfter);
@ -44,7 +44,7 @@ public class EntityEnrichmentService {
private String findTextBefore(int index, TextBlock textBlock) {
int offsetBefore = Math.max(index - redactionServiceSettings.getSurroundingWordsOffsetWindow(), textBlock.getBoundary().start());
int offsetBefore = Math.max(index - redactionServiceSettings.getSurroundingWordsOffsetWindow(), textBlock.getTextRange().start());
String textBefore = textBlock.subSequence(offsetBefore, index).toString();
if (!textBefore.isBlank()) {
List<String> wordsBefore = splitToWordsAndRemoveEmptyWords(textBefore);

View File

@ -0,0 +1,95 @@
package com.iqser.red.service.redaction.v1.server.document.services;
import java.awt.geom.Rectangle2D;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.NoSuchElementException;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.Entity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.PositionOnPage;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Image;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
import lombok.RequiredArgsConstructor;
@RequiredArgsConstructor
public class ManualChangesApplicationService {
private final EntityCreationService entityCreationService;
public void recategorize(Entity entityToBeReCategorized, ManualImageRecategorization manualImageRecategorization) {
if (entityToBeReCategorized instanceof Image image) {
image.setImageType(ImageType.fromString(manualImageRecategorization.getType()));
return;
}
// need to create a new entity and copy over all values, since type is part of the primary key for entities and should never be changed!
if (entityToBeReCategorized instanceof TextEntity textEntity) {
TextEntity recategorizedEntity = entityCreationService.copyEntity(textEntity, manualImageRecategorization.getType(), textEntity.getEntityType(), textEntity.getDeepestFullyContainingNode());
recategorizedEntity.setPositionsOnPagePerPage(textEntity.getPositionsOnPagePerPage());
recategorizedEntity.getManualOverwrite().addChange(manualImageRecategorization);
textEntity.removeFromGraph();
}
}
public void resizeEntityAndReinsert(TextEntity entityToBeResized, ManualResizeRedaction manualResizeRedaction) {
PositionOnPage positionOnPageToBeResized = entityToBeResized.getPositionsOnPagePerPage()
.stream()
.filter(redactionPosition -> redactionPosition.getId().equals(manualResizeRedaction.getAnnotationId()))
.findFirst()
.orElseThrow(() -> new NoSuchElementException("No redaction position with matching annotation id found!"));
positionOnPageToBeResized.setRectanglePerLine(manualResizeRedaction.getPositions().stream().map(ManualChangesApplicationService::toRectangle2D).toList());
int newStartOffset;
if (manualResizeRedaction.getValue().length() > entityToBeResized.getValue().length()) {
newStartOffset = entityToBeResized.getTextRange().start() - manualResizeRedaction.getValue().indexOf(entityToBeResized.getValue());
} else {
newStartOffset = entityToBeResized.getTextRange().start() + entityToBeResized.getValue().indexOf(manualResizeRedaction.getValue());
}
// need to reinsert the entity, due to the boundary having changed.
removeAndUpdateAndReInsertEntity(entityToBeResized, manualResizeRedaction, newStartOffset);
entityToBeResized.getManualOverwrite().addChange(manualResizeRedaction);
}
private void removeAndUpdateAndReInsertEntity(TextEntity entityToBeResized, ManualResizeRedaction manualResizeRedaction, int newStartOffset) {
SemanticNode nodeToInsertInto = entityToBeResized.getDeepestFullyContainingNode().getDocumentTree().getRoot().getNode();
entityToBeResized.getIntersectingNodes().forEach(node -> node.getEntities().remove(this));
entityToBeResized.getPages().forEach(page -> page.getEntities().remove(this));
entityToBeResized.setIntersectingNodes(new LinkedList<>());
entityToBeResized.setDeepestFullyContainingNode(null);
entityToBeResized.setPages(new HashSet<>());
entityToBeResized.getTextRange().setStart(newStartOffset);
entityToBeResized.getTextRange().setEnd(newStartOffset + manualResizeRedaction.getValue().length());
entityCreationService.addEntityToGraph(entityToBeResized, nodeToInsertInto);
}
public void resizeImage(Image image, ManualResizeRedaction manualResizeRedaction) {
if (manualResizeRedaction.getPositions().isEmpty() || manualResizeRedaction.getPositions() == null) {
return;
}
var bBox = RectangleTransformations.rectangle2DBBox(manualResizeRedaction.getPositions().stream().map(ManualChangesApplicationService::toRectangle2D).toList());
image.setPosition(bBox);
image.getManualOverwrite().addChange(manualResizeRedaction);
}
private static Rectangle2D toRectangle2D(com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle rect) {
return new Rectangle2D.Double(rect.getTopLeftX() - rect.getWidth(), rect.getTopLeftY() - rect.getHeight(), rect.getWidth(), rect.getHeight());
}
}

View File

@ -1,64 +0,0 @@
package com.iqser.red.service.redaction.v1.server.document.services;
import java.awt.geom.Rectangle2D;
import java.util.NoSuchElementException;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Image;
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
import lombok.RequiredArgsConstructor;
@RequiredArgsConstructor
public class ManualRedactionApplicationService {
private final EntityCreationService entityCreationService;
public void resizeEntityAndReinsert(RedactionEntity entityToBeResized, ManualResizeRedaction manualResizeRedaction) {
RedactionPosition redactionPositionToBeResized = entityToBeResized.getRedactionPositionsPerPage()
.stream()
.filter(redactionPosition -> redactionPosition.getId().equals(manualResizeRedaction.getAnnotationId()))
.findFirst()
.orElseThrow(() -> new NoSuchElementException("No redaction position with matching annotation id found!"));
redactionPositionToBeResized.setRectanglePerLine(manualResizeRedaction.getPositions().stream().map(ManualRedactionApplicationService::toRectangle2D).toList());
int newStartOffset;
if (manualResizeRedaction.getValue().length() > entityToBeResized.getValue().length()) {
newStartOffset = entityToBeResized.getBoundary().start() - manualResizeRedaction.getValue().indexOf(entityToBeResized.getValue());
} else {
newStartOffset = entityToBeResized.getBoundary().start() + entityToBeResized.getValue().indexOf(manualResizeRedaction.getValue());
}
SemanticNode nodeToInsertInto = entityToBeResized.getDeepestFullyContainingNode().getDocumentTree().getRoot().getNode();
entityToBeResized.removeFromGraph();
entityToBeResized.setResized(true);
entityToBeResized.setRemoved(false);
entityToBeResized.setIgnored(false);
entityToBeResized.getBoundary().setStart(newStartOffset);
entityToBeResized.getBoundary().setEnd(newStartOffset + manualResizeRedaction.getValue().length());
entityCreationService.addEntityToGraph(entityToBeResized, nodeToInsertInto);
}
public void resizeImage(Image image, ManualResizeRedaction manualResizeRedaction) {
if (manualResizeRedaction.getPositions().isEmpty() || manualResizeRedaction.getPositions() == null) {
return;
}
var bBox = RectangleTransformations.rectangle2DBBox(manualResizeRedaction.getPositions().stream().map(ManualRedactionApplicationService::toRectangle2D).toList());
image.setPosition(bBox);
}
private static Rectangle2D toRectangle2D(com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle rect) {
return new Rectangle2D.Double(rect.getTopLeftX() - rect.getWidth(), rect.getTopLeftY() - rect.getHeight(), rect.getWidth(), rect.getHeight());
}
}

View File

@ -12,8 +12,6 @@ import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collector;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.AtomicTextBlock;
@ -22,7 +20,6 @@ import lombok.NoArgsConstructor;
public class RectangleTransformations {
public static Rectangle2D atomicTextBlockBBox(List<AtomicTextBlock> atomicTextBlocks) {
return atomicTextBlocks.stream().flatMap(atomicTextBlock -> atomicTextBlock.getPositions().stream()).collect(new Rectangle2DBBoxCollector());
@ -44,15 +41,6 @@ public class RectangleTransformations {
}
public static Rectangle toRedactionLogRectangle(Rectangle2D rectangle2D, int pageNumber) {
return new Rectangle(new Point((float) rectangle2D.getMinX(), (float) (rectangle2D.getMinY() + rectangle2D.getHeight())),
(float) rectangle2D.getWidth(),
-(float) rectangle2D.getHeight(),
pageNumber);
}
public static Rectangle2D rectangle2DBBox(List<Rectangle2D> rectangle2DList) {
return rectangle2DList.stream().collect(new Rectangle2DBBoxCollector());

View File

@ -9,8 +9,8 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
@ -51,60 +51,60 @@ public class RedactionSearchUtility {
}
public static Boundary findFirstBoundary(String regexPattern, CharSequence searchText) {
public static TextRange findFirstBoundary(String regexPattern, CharSequence searchText) {
Pattern pattern = Patterns.getCompiledPattern(regexPattern, false);
Matcher matcher = pattern.matcher(searchText);
if (matcher.find()) {
return new Boundary(matcher.start(), matcher.end());
return new TextRange(matcher.start(), matcher.end());
}
throw new IllegalArgumentException(format("Charsequence %s does not contain any matches for pattern %s", searchText, regexPattern));
}
public static int getExpandedEndByRegex(RedactionEntity entity, String regexPattern) {
public static int getExpandedEndByRegex(TextEntity entity, String regexPattern) {
int expandedEnd;
if (anyMatch(entity.getTextAfter(), regexPattern)) {
Boundary postfixBoundary = findFirstBoundary(regexPattern, entity.getTextAfter());
expandedEnd = postfixBoundary.end() + entity.getBoundary().end();
TextRange postfixTextRange = findFirstBoundary(regexPattern, entity.getTextAfter());
expandedEnd = postfixTextRange.end() + entity.getTextRange().end();
} else {
expandedEnd = entity.getBoundary().end();
expandedEnd = entity.getTextRange().end();
}
return expandedEnd;
}
public static int getExpandedStartByRegex(RedactionEntity entity, String regexPattern) {
public static int getExpandedStartByRegex(TextEntity entity, String regexPattern) {
int expandedStart;
if (anyMatch(entity.getTextBefore(), regexPattern)) {
Boundary prefixBoundary = findFirstBoundary(regexPattern, entity.getTextBefore());
expandedStart = prefixBoundary.start() + entity.getBoundary().start() - entity.getTextBefore().length();
TextRange prefixTextRange = findFirstBoundary(regexPattern, entity.getTextBefore());
expandedStart = prefixTextRange.start() + entity.getTextRange().start() - entity.getTextBefore().length();
} else {
expandedStart = entity.getBoundary().start();
expandedStart = entity.getTextRange().start();
}
return expandedStart;
}
public static Boundary findBoundaryOfAllLinesInYRange(double maxY, double minY, TextBlock textBlock) {
public static TextRange findBoundaryOfAllLinesInYRange(double maxY, double minY, TextBlock textBlock) {
List<Boundary> lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed().map(textBlock::getLineBoundary).filter(lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary)).toList();
List<TextRange> lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed().map(textBlock::getLineTextRange).filter(lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary)).toList();
if (lineBoundaries.isEmpty()) {
return new Boundary(textBlock.getBoundary().start(), textBlock.getBoundary().start());
return new TextRange(textBlock.getTextRange().start(), textBlock.getTextRange().start());
}
return Boundary.merge(lineBoundaries);
return TextRange.merge(lineBoundaries);
}
private static boolean isWithinYRange(double maxY, double minY, TextBlock textBlock, Boundary lineBoundary) {
private static boolean isWithinYRange(double maxY, double minY, TextBlock textBlock, TextRange lineTextRange) {
Rectangle2D lineBBox = RectangleTransformations.rectangle2DBBox(textBlock.getPositions(lineBoundary));
Rectangle2D lineBBox = RectangleTransformations.rectangle2DBBox(textBlock.getPositions(lineTextRange));
return lineBBox.getMinY() < maxY && minY < lineBBox.getMaxY();
}
public static List<Boundary> findBoundariesByRegex(String regexPattern, TextBlock textBlock) {
public static List<TextRange> findBoundariesByRegex(String regexPattern, TextBlock textBlock) {
Pattern pattern = Patterns.getCompiledPattern(regexPattern, false);
return getBoundariesByPattern(textBlock, 0, pattern);
@ -112,68 +112,68 @@ public class RedactionSearchUtility {
}
public static List<Boundary> findBoundariesByRegex(String regexPattern, int group, TextBlock textBlock) {
public static List<TextRange> findBoundariesByRegex(String regexPattern, int group, TextBlock textBlock) {
Pattern pattern = Patterns.getCompiledPattern(regexPattern, false);
return getBoundariesByPattern(textBlock, group, pattern);
}
public static List<Boundary> findBoundariesByRegexWithLineBreaks(String regexPattern, int group, TextBlock textBlock) {
public static List<TextRange> findBoundariesByRegexWithLineBreaks(String regexPattern, int group, TextBlock textBlock) {
Pattern pattern = Patterns.getCompiledMultilinePattern(regexPattern, false);
return getBoundariesByPatternWithLineBreaks(textBlock, group, pattern);
}
public static List<Boundary> findBoundariesByRegexWithLineBreaksIgnoreCase(String regexPattern, int group, TextBlock textBlock) {
public static List<TextRange> findBoundariesByRegexWithLineBreaksIgnoreCase(String regexPattern, int group, TextBlock textBlock) {
Pattern pattern = Patterns.getCompiledMultilinePattern(regexPattern, true);
return getBoundariesByPatternWithLineBreaks(textBlock, group, pattern);
}
public static List<Boundary> findBoundariesByRegexIgnoreCase(String regexPattern, int group, TextBlock textBlock) {
public static List<TextRange> findBoundariesByRegexIgnoreCase(String regexPattern, int group, TextBlock textBlock) {
Pattern pattern = Patterns.getCompiledPattern(regexPattern, true);
return getBoundariesByPattern(textBlock, group, pattern);
}
private static List<Boundary> getBoundariesByPattern(TextBlock textBlock, int group, Pattern pattern) {
private static List<TextRange> getBoundariesByPattern(TextBlock textBlock, int group, Pattern pattern) {
Matcher matcher = pattern.matcher(textBlock.subSequence(textBlock.getBoundary()));
List<Boundary> boundaries = new LinkedList<>();
Matcher matcher = pattern.matcher(textBlock.subSequence(textBlock.getTextRange()));
List<TextRange> boundaries = new LinkedList<>();
while (matcher.find()) {
boundaries.add(new Boundary(matcher.start(group) + textBlock.getBoundary().start(), matcher.end(group) + textBlock.getBoundary().start()));
boundaries.add(new TextRange(matcher.start(group) + textBlock.getTextRange().start(), matcher.end(group) + textBlock.getTextRange().start()));
}
return boundaries;
}
private static List<Boundary> getBoundariesByPatternWithLineBreaks(TextBlock textBlock, int group, Pattern pattern) {
private static List<TextRange> getBoundariesByPatternWithLineBreaks(TextBlock textBlock, int group, Pattern pattern) {
String searchTextWithLineBreaks = textBlock.searchTextWithLineBreaks();
Matcher matcher = pattern.matcher(searchTextWithLineBreaks);
List<Boundary> boundaries = new LinkedList<>();
List<TextRange> boundaries = new LinkedList<>();
while (matcher.find()) {
boundaries.add(new Boundary(matcher.start(group) + textBlock.getBoundary().start(), matcher.end(group) + textBlock.getBoundary().start()));
boundaries.add(new TextRange(matcher.start(group) + textBlock.getTextRange().start(), matcher.end(group) + textBlock.getTextRange().start()));
}
return boundaries;
}
public static List<Boundary> findBoundariesByString(String searchString, TextBlock textBlock) {
public static List<TextRange> findBoundariesByString(String searchString, TextBlock textBlock) {
List<Boundary> boundaries = new LinkedList<>();
List<TextRange> boundaries = new LinkedList<>();
for (int index = textBlock.indexOf(searchString); index >= 0; index = textBlock.indexOf(searchString, index + 1)) {
boundaries.add(new Boundary(index, index + searchString.length()));
boundaries.add(new TextRange(index, index + searchString.length()));
}
return boundaries;
}
public static List<Boundary> findBoundariesByStringIgnoreCase(String searchString, TextBlock textBlock) {
public static List<TextRange> findBoundariesByStringIgnoreCase(String searchString, TextBlock textBlock) {
Pattern pattern = Pattern.compile(Pattern.quote(searchString), Pattern.CASE_INSENSITIVE);
return getBoundariesByPattern(textBlock, 0, pattern);

View File

@ -20,15 +20,15 @@ import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.PositionOnPage;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier;
import com.iqser.red.service.redaction.v1.server.redaction.model.ManualEntity;
import com.iqser.red.service.redaction.v1.server.redaction.model.RectangleWithPage;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.SearchImplementation;
@ -49,85 +49,85 @@ public class CustomEntityCreationAdapter {
}
public List<EntityIdentifier> toRedactionEntity(RedactionLog redactionLog, SemanticNode node) {
public List<ManualEntity> toRedactionEntity(RedactionLog redactionLog, SemanticNode node) {
List<EntityIdentifier> entityIdentifiers = redactionLog.getRedactionLogEntry().stream().map(EntityIdentifier::fromRedactionLogEntry).toList();
return toRedactionEntity(entityIdentifiers, node);
List<ManualEntity> manualEntities = redactionLog.getRedactionLogEntry().stream().map(ManualEntity::fromRedactionLogEntry).toList();
return toRedactionEntity(manualEntities, node);
}
public List<EntityIdentifier> createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set<ManualRedactionEntry> manualRedactionEntries, SemanticNode node) {
public List<ManualEntity> createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set<ManualRedactionEntry> manualRedactionEntries, SemanticNode node) {
List<EntityIdentifier> entityIdentifiers = manualRedactionEntries.stream()
List<ManualEntity> manualEntities = manualRedactionEntries.stream()
.filter(manualRedactionEntry -> !(manualRedactionEntry.isAddToDictionary() || manualRedactionEntry.isAddToDossierDictionary()))
.map(EntityIdentifier::fromManualRedactionEntry)
.map(ManualEntity::fromManualRedactionEntry)
.toList();
return toRedactionEntity(entityIdentifiers, node);
return toRedactionEntity(manualEntities, node);
}
private List<EntityIdentifier> toRedactionEntity(List<EntityIdentifier> entityIdentifiers, SemanticNode node) {
private List<ManualEntity> toRedactionEntity(List<ManualEntity> manualEntities, SemanticNode node) {
Set<Integer> pageNumbers = entityIdentifiers.stream().flatMap(entry -> entry.getEntityPosition().stream().map(RectangleWithPage::pageNumber)).collect(Collectors.toSet());
Set<String> entryValues = entityIdentifiers.stream().map(EntityIdentifier::getValue).map(String::toLowerCase).collect(Collectors.toSet());
Set<Integer> pageNumbers = manualEntities.stream().flatMap(entry -> entry.getEntityPosition().stream().map(RectangleWithPage::pageNumber)).collect(Collectors.toSet());
Set<String> entryValues = manualEntities.stream().map(ManualEntity::getValue).map(String::toLowerCase).collect(Collectors.toSet());
Map<String, List<RedactionEntity>> tempEntitiesByValue = findAllPossibleEntitiesAndGroupByValue(node, pageNumbers, entryValues);
Map<String, List<TextEntity>> tempEntitiesByValue = findAllPossibleEntitiesAndGroupByValue(node, pageNumbers, entryValues);
List<EntityIdentifier> notFoundEntityIdentifiers = new LinkedList<>();
for (EntityIdentifier entityIdentifier : entityIdentifiers) {
Optional<RedactionEntity> optionalRedactionEntity = findClosestEntityAndReturnEmptyIfNotFound(entityIdentifier, tempEntitiesByValue);
List<ManualEntity> notFoundManualEntities = new LinkedList<>();
for (ManualEntity manualEntity : manualEntities) {
Optional<TextEntity> optionalRedactionEntity = findClosestEntityAndReturnEmptyIfNotFound(manualEntity, tempEntitiesByValue);
if (optionalRedactionEntity.isEmpty()) {
notFoundEntityIdentifiers.add(entityIdentifier);
notFoundManualEntities.add(manualEntity);
continue;
}
createCorrectEntity(entityIdentifier, node, optionalRedactionEntity.get().getBoundary());
createCorrectEntity(manualEntity, node, optionalRedactionEntity.get().getTextRange());
}
tempEntitiesByValue.values().stream().flatMap(Collection::stream).forEach(RedactionEntity::removeFromGraph);
return notFoundEntityIdentifiers;
tempEntitiesByValue.values().stream().flatMap(Collection::stream).forEach(TextEntity::removeFromGraph);
return notFoundManualEntities;
}
/**
* Deletes the temp Entity and creates a RedactionEntity with correct values, based on the given parameters.
*
* @param entityIdentifier The entity identifier for the RedactionEntity.
* @param manualEntity The entity identifier for the RedactionEntity.
* @param node The SemanticNode associated with the RedactionEntity.
* @param closestBoundary The closest Boundary to the RedactionEntity.
* @param closestTextRange The closest Boundary to the RedactionEntity.
*/
private void createCorrectEntity(EntityIdentifier entityIdentifier, SemanticNode node, Boundary closestBoundary) {
private void createCorrectEntity(ManualEntity manualEntity, SemanticNode node, TextRange closestTextRange) {
RedactionEntity correctEntity = entityCreationService.forceByBoundary(closestBoundary, entityIdentifier.getType(), entityIdentifier.getEntityType(), node);
TextEntity correctEntity = entityCreationService.forceByBoundary(closestTextRange, manualEntity.getType(), manualEntity.getEntityType(), node);
if (entityIdentifier.isApplied()) {
correctEntity.force(entityIdentifier.getRuleIdentifier(), entityIdentifier.getReason(), entityIdentifier.getLegalBasis());
if (manualEntity.isApplied()) {
correctEntity.force(manualEntity.getRuleIdentifier(), manualEntity.getReason(), manualEntity.getLegalBasis());
} else {
correctEntity.skip(entityIdentifier.getRuleIdentifier(), entityIdentifier.getReason());
correctEntity.skip(manualEntity.getRuleIdentifier(), manualEntity.getReason());
}
correctEntity.setDictionaryEntry(entityIdentifier.isDictionaryEntry());
correctEntity.setDossierDictionaryEntry(entityIdentifier.isDossierDictionaryEntry());
correctEntity.setDictionaryEntry(manualEntity.isDictionaryEntry());
correctEntity.setDossierDictionaryEntry(manualEntity.isDossierDictionaryEntry());
correctEntity.getManualOverwrite().addChanges(manualEntity.getManualOverwrite().getManualChangeLog());
// TODO: refactor this away! This is only needed so the persistence service can apply the correct comment and ManualChanges.
// It would be better, if the redaction-service returns a map of annotationId changes and the persistence service then migrates the annotationIds of Comments and ManualRedactions
List<RedactionPosition> redactionPositionsWithIdOfManualRedaction = new ArrayList<>(correctEntity.getRedactionPositionsPerPage().size());
for (RedactionPosition redactionPosition : correctEntity.getRedactionPositionsPerPage()) {
redactionPositionsWithIdOfManualRedaction.add(new RedactionPosition(entityIdentifier.getId(), redactionPosition.getPage(), redactionPosition.getRectanglePerLine()));
// AnnotationIds must match the IDs in the add requests, or comments break. Maybe think about migrating IDs on the fly!
List<PositionOnPage> redactionPositionsWithIdOfManualOnPage = new ArrayList<>(correctEntity.getPositionsOnPagePerPage().size());
for (PositionOnPage positionOnPage : correctEntity.getPositionsOnPagePerPage()) {
redactionPositionsWithIdOfManualOnPage.add(new PositionOnPage(manualEntity.getId(), positionOnPage.getPage(), positionOnPage.getRectanglePerLine()));
}
correctEntity.setRedactionPositionsPerPage(redactionPositionsWithIdOfManualRedaction);
correctEntity.setPositionsOnPagePerPage(redactionPositionsWithIdOfManualOnPage);
}
private Optional<RedactionEntity> findClosestEntityAndReturnEmptyIfNotFound(EntityIdentifier identifier, Map<String, List<RedactionEntity>> entitiesWithSameValue) {
private Optional<TextEntity> findClosestEntityAndReturnEmptyIfNotFound(ManualEntity identifier, Map<String, List<TextEntity>> entitiesWithSameValue) {
List<RedactionEntity> possibleEntities = entitiesWithSameValue.get(identifier.getValue().toLowerCase(Locale.ROOT));
List<TextEntity> possibleEntities = entitiesWithSameValue.get(identifier.getValue().toLowerCase(Locale.ROOT));
if (entityIdentifierValueNotFound(possibleEntities)) {
log.warn("Entity could not be created with identifier: {}, due to the value {} not being found anywhere.", identifier, identifier.getValue());
return Optional.empty();
}
Optional<RedactionEntity> optionalClosestEntity = possibleEntities.stream()
Optional<TextEntity> optionalClosestEntity = possibleEntities.stream()
.filter(entity -> pagesMatch(entity, identifier.getEntityPosition()))
.min(Comparator.comparingDouble(entity -> calculateMinDistance(identifier.getEntityPosition(), entity)));
@ -136,14 +136,14 @@ public class CustomEntityCreationAdapter {
return Optional.empty();
}
RedactionEntity closestEntity = optionalClosestEntity.get();
TextEntity closestEntity = optionalClosestEntity.get();
double distance = calculateMinDistance(identifier.getEntityPosition(), closestEntity);
if (distance > MATCH_THRESHOLD) {
log.warn(format("Distance to closest found entity is %.2f and therefore higher than the threshold of %.2f for \n%s \n%s",
distance,
MATCH_THRESHOLD,
identifier.getEntityPosition(),
closestEntity.getRedactionPositionsPerPage()));
closestEntity.getPositionsOnPagePerPage()));
return Optional.empty();
}
@ -151,13 +151,13 @@ public class CustomEntityCreationAdapter {
}
private static boolean entityIdentifierValueNotFound(List<RedactionEntity> possibleEntities) {
private static boolean entityIdentifierValueNotFound(List<TextEntity> possibleEntities) {
return possibleEntities == null || possibleEntities.isEmpty();
}
private Map<String, List<RedactionEntity>> findAllPossibleEntitiesAndGroupByValue(SemanticNode node, Set<Integer> pageNumbers, Set<String> entryValues) {
private Map<String, List<TextEntity>> findAllPossibleEntitiesAndGroupByValue(SemanticNode node, Set<Integer> pageNumbers, Set<String> entryValues) {
if (!pageNumbers.stream().allMatch(node::onPage)) {
throw new IllegalArgumentException(format("SemanticNode \"%s\" does not contain these pages %s, it has pages: %s",
@ -167,28 +167,28 @@ public class CustomEntityCreationAdapter {
}
SearchImplementation searchImplementation = new SearchImplementation(entryValues, true);
return searchImplementation.getBoundaries(node.getTextBlock(), node.getBoundary())
return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange())
.stream()
.map(boundary -> entityCreationService.forceByBoundary(boundary, "temp", EntityType.ENTITY, node))
.collect(groupingBy(entity -> entity.getValue().toLowerCase(Locale.ROOT)));
}
private static boolean allValuesFound(Map<String, List<RedactionEntity>> entitiesByValue, Set<String> entryValues) {
private static boolean allValuesFound(Map<String, List<TextEntity>> entitiesByValue, Set<String> entryValues) {
return entitiesByValue.keySet().equals(entryValues);
}
private static boolean pagesMatch(RedactionEntity entity, List<RectangleWithPage> originalPositions) {
private static boolean pagesMatch(TextEntity entity, List<RectangleWithPage> originalPositions) {
Set<Integer> entityPageNumbers = entity.getRedactionPositionsPerPage().stream().map(RedactionPosition::getPage).map(Page::getNumber).collect(Collectors.toSet());
Set<Integer> entityPageNumbers = entity.getPositionsOnPagePerPage().stream().map(PositionOnPage::getPage).map(Page::getNumber).collect(Collectors.toSet());
Set<Integer> originalPageNumbers = originalPositions.stream().map(RectangleWithPage::pageNumber).collect(Collectors.toSet());
return entityPageNumbers.containsAll(originalPageNumbers);
}
private double calculateMinDistance(List<RectangleWithPage> originalPositions, RedactionEntity entity) {
private double calculateMinDistance(List<RectangleWithPage> originalPositions, TextEntity entity) {
if (originalPositions.size() != countRectangles(entity)) {
return Double.MAX_VALUE;
@ -199,18 +199,18 @@ public class CustomEntityCreationAdapter {
}
private static long countRectangles(RedactionEntity entity) {
private static long countRectangles(TextEntity entity) {
return entity.getRedactionPositionsPerPage().stream().mapToLong(redactionPosition -> redactionPosition.getRectanglePerLine().size()).sum();
return entity.getPositionsOnPagePerPage().stream().mapToLong(redactionPosition -> redactionPosition.getRectanglePerLine().size()).sum();
}
private double calculateMinDistancePerRectangle(RedactionEntity entity, int pageNumber, Rectangle2D originalRectangle) {
private double calculateMinDistancePerRectangle(TextEntity entity, int pageNumber, Rectangle2D originalRectangle) {
return entity.getRedactionPositionsPerPage()
return entity.getPositionsOnPagePerPage()
.stream()
.filter(redactionPosition -> redactionPosition.getPage().getNumber() == pageNumber)
.map(RedactionPosition::getRectanglePerLine)
.map(PositionOnPage::getRectanglePerLine)
.flatMap(Collection::stream)
.mapToDouble(rectangle -> calculateDistance(rectangle, originalRectangle))
.min()

View File

@ -4,7 +4,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -37,7 +37,7 @@ public class NerEntities {
}
public record NerEntity(String value, Boundary boundary, String type) {
public record NerEntity(String value, TextRange textRange, String type) {
}

View File

@ -9,7 +9,7 @@ import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Section;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
@ -44,7 +44,7 @@ public class NerEntitiesAdapter {
return new NerEntities(addOffsetsAndFlatten(getStringStartOffsetsForMainSections(document),
nerEntitiesModel).map(nerEntityModel -> new NerEntities.NerEntity(nerEntityModel.getValue(),
new Boundary(nerEntityModel.getStartOffset(), nerEntityModel.getEndOffset()),
new TextRange(nerEntityModel.getStartOffset(), nerEntityModel.getEndOffset()),
nerEntityModel.getType())).toList());
}
@ -63,17 +63,17 @@ public class NerEntitiesAdapter {
* @param allowDuplicates allow combining multiple parts of same type
* @return A Stream of the combined boundaries
*/
public Stream<Boundary> combineNerEntities(NerEntities nerEntities,
Set<String> essentialTypes,
Set<String> typesToCombine,
int maxDistanceBetweenParts,
int minPartsToCombine,
boolean allowDuplicates) {
public Stream<TextRange> combineNerEntities(NerEntities nerEntities,
Set<String> essentialTypes,
Set<String> typesToCombine,
int maxDistanceBetweenParts,
int minPartsToCombine,
boolean allowDuplicates) {
List<NerEntities.NerEntity> sortedEntities = nerEntities.getNerEntityList()
.stream()
.filter(entity -> typesToCombine.contains(entity.type()))
.sorted(Comparator.comparingInt(entity -> entity.boundary().start()))
.sorted(Comparator.comparingInt(entity -> entity.textRange().start()))
.toList();
if (sortedEntities.isEmpty()) {
@ -86,20 +86,20 @@ public class NerEntitiesAdapter {
for (NerEntities.NerEntity startEntity : startEntitiesOfEssentialType) {
List<NerEntities.NerEntity> currentCluster = new LinkedList<>();
entityClusters.add(currentCluster);
int lastEndOffset = startEntity.boundary().end();
int lastEndOffset = startEntity.textRange().end();
for (NerEntities.NerEntity entity : sortedEntities) {
if (entity.boundary().start() < lastEndOffset) {
if (entity.textRange().start() < lastEndOffset) {
continue;
}
if (distanceIsLargerThanMaxDistance(lastEndOffset, entity, maxDistanceBetweenParts) || isDuplicate(currentCluster, entity, allowDuplicates)) {
currentCluster = new LinkedList<>();
entityClusters.add(currentCluster);
currentCluster.add(entity);
lastEndOffset = entity.boundary().end();
lastEndOffset = entity.textRange().end();
} else {
currentCluster.add(entity);
lastEndOffset = entity.boundary().end();
lastEndOffset = entity.textRange().end();
}
}
}
@ -120,7 +120,7 @@ public class NerEntitiesAdapter {
*
* @return A Stream of the combined entities of type "CBI_address"
*/
public Stream<Boundary> combineNerEntitiesToCbiAddressDefaults(NerEntities entityRecognitionEntities) {
public Stream<TextRange> combineNerEntitiesToCbiAddressDefaults(NerEntities entityRecognitionEntities) {
return combineNerEntities(entityRecognitionEntities,
CBI_ADDRESS_ESSENTIAL_TYPES,
@ -139,13 +139,13 @@ public class NerEntitiesAdapter {
private static boolean distanceIsLargerThanMaxDistance(int lastEndOffset, NerEntities.NerEntity entity, int maxDistance) {
return (entity.boundary().start() - lastEndOffset) > maxDistance;
return (entity.textRange().start() - lastEndOffset) > maxDistance;
}
private static Boundary toContainingBoundary(List<NerEntities.NerEntity> nerEntities) {
private static TextRange toContainingBoundary(List<NerEntities.NerEntity> nerEntities) {
return Boundary.merge(nerEntities.stream().map(NerEntities.NerEntity::boundary).toList());
return TextRange.merge(nerEntities.stream().map(NerEntities.NerEntity::textRange).toList());
}
@ -162,7 +162,7 @@ public class NerEntitiesAdapter {
private static List<Integer> getStringStartOffsetsForMainSections(Document document) {
return document.getMainSections().stream().map(Section::getTextBlock).map(TextBlock::getBoundary).map(Boundary::start).toList();
return document.getMainSections().stream().map(Section::getTextBlock).map(TextBlock::getTextRange).map(TextRange::start).toList();
}
}

View File

@ -1,73 +0,0 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.util.List;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.experimental.FieldDefaults;
@Getter
@AllArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class EntityIdentifier {
// must be used for comments to work correctly
String id;
String value;
List<RectangleWithPage> entityPosition;
String ruleIdentifier;
String reason;
String legalBasis;
String type;
String section;
EntityType entityType;
boolean applied;
boolean isDictionaryEntry;
boolean isDossierDictionaryEntry;
boolean rectangle;
public static EntityIdentifier fromRedactionLogEntry(RedactionLogEntry redactionLogEntry) {
String ruleIdentifier = redactionLogEntry.getType() + "." + redactionLogEntry.getMatchedRule() + ".0";
List<RectangleWithPage> rectangleWithPages = redactionLogEntry.getPositions().stream().map(RectangleWithPage::fromRedactionLogRectangle).toList();
return new EntityIdentifier(redactionLogEntry.getId(),
redactionLogEntry.getValue(),
rectangleWithPages,
ruleIdentifier,
redactionLogEntry.getReason(),
redactionLogEntry.getLegalBasis(),
redactionLogEntry.getType(),
redactionLogEntry.getSection(),
redactionLogEntry.isRecommendation() ? EntityType.RECOMMENDATION : EntityType.ENTITY,
redactionLogEntry.isRedacted(),
redactionLogEntry.isDictionaryEntry(),
redactionLogEntry.isDossierDictionaryEntry(),
redactionLogEntry.isRectangle());
}
public static EntityIdentifier fromManualRedactionEntry(ManualRedactionEntry manualRedactionEntry) {
List<RectangleWithPage> rectangleWithPages = manualRedactionEntry.getPositions().stream().map(RectangleWithPage::fromAnnotationRectangle).toList();
return new EntityIdentifier(manualRedactionEntry.getAnnotationId(),
manualRedactionEntry.getValue(),
rectangleWithPages,
"MAN.0.0",
manualRedactionEntry.getReason(),
manualRedactionEntry.getLegalBasis(),
manualRedactionEntry.getType(),
manualRedactionEntry.getSection(),
EntityType.ENTITY,
true,
false,
false,
manualRedactionEntry.isRectangle());
}
}

View File

@ -0,0 +1,91 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.util.List;
import java.util.PriorityQueue;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.Entity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.ManualChangeOverwrite;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import lombok.experimental.FieldDefaults;
@Getter
@Builder
@AllArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class ManualEntity implements Entity {
// must be mapped into a TextEntity as is for comments to work correctly
String id;
String value;
List<RectangleWithPage> entityPosition;
String ruleIdentifier;
String reason;
String legalBasis;
String type;
String section;
EntityType entityType;
boolean applied;
boolean isDictionaryEntry;
boolean isDossierDictionaryEntry;
boolean rectangle;
@Builder.Default
PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
@Builder.Default
ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite();
public static ManualEntity fromRedactionLogEntry(RedactionLogEntry redactionLogEntry) {
String ruleIdentifier = redactionLogEntry.getType() + "." + redactionLogEntry.getMatchedRule() + ".0";
List<RectangleWithPage> rectangleWithPages = redactionLogEntry.getPositions().stream().map(RectangleWithPage::fromRedactionLogRectangle).toList();
return ManualEntity.builder()
.id(redactionLogEntry.getId())
.value(redactionLogEntry.getValue())
.entityPosition(rectangleWithPages)
.ruleIdentifier(ruleIdentifier)
.reason(redactionLogEntry.getReason())
.legalBasis(redactionLogEntry.getLegalBasis())
.type(redactionLogEntry.getType())
.section(redactionLogEntry.getSection())
.entityType(redactionLogEntry.isRecommendation() ? EntityType.RECOMMENDATION : EntityType.ENTITY)
.applied(redactionLogEntry.isRedacted())
.isDictionaryEntry(redactionLogEntry.isDictionaryEntry())
.isDossierDictionaryEntry(redactionLogEntry.isDossierDictionaryEntry())
.rectangle(redactionLogEntry.isRectangle())
.build();
}
public static ManualEntity fromManualRedactionEntry(ManualRedactionEntry manualRedactionEntry) {
List<RectangleWithPage> rectangleWithPages = manualRedactionEntry.getPositions().stream().map(RectangleWithPage::fromAnnotationRectangle).toList();
ManualChangeOverwrite manualChangeOverwrite = new ManualChangeOverwrite();
manualChangeOverwrite.addChange(manualRedactionEntry);
return ManualEntity.builder()
.id(manualRedactionEntry.getAnnotationId())
.value(manualRedactionEntry.getValue())
.entityPosition(rectangleWithPages)
.ruleIdentifier("MAN.0.0")
.reason(manualRedactionEntry.getReason())
.legalBasis(manualRedactionEntry.getLegalBasis())
.type(manualRedactionEntry.getType())
.section(manualRedactionEntry.getSection())
.entityType(EntityType.ENTITY)
.applied(true)
.isDictionaryEntry(false)
.isDossierDictionaryEntry(false)
.rectangle(manualRedactionEntry.isRectangle())
.manualOverwrite(manualChangeOverwrite)
.build();
}
}

View File

@ -17,7 +17,7 @@ import org.apache.commons.lang3.StringUtils;
import com.iqser.red.service.redaction.v1.server.exception.NotFoundException;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
import lombok.Data;
@ -116,28 +116,28 @@ public class Dictionary {
}
public void recommendEverywhere(RedactionEntity redactionEntity) {
public void recommendEverywhere(TextEntity textEntity) {
addLocalDictionaryEntry(redactionEntity.getType(), redactionEntity.getValue(), redactionEntity.getMatchedRuleList(), false);
addLocalDictionaryEntry(textEntity.getType(), textEntity.getValue(), textEntity.getMatchedRuleList(), false);
}
public void recommendEverywhereWithLastNameSeparately(RedactionEntity redactionEntity) {
public void recommendEverywhereWithLastNameSeparately(TextEntity textEntity) {
addLocalDictionaryEntry(redactionEntity.getType(), redactionEntity.getValue(), redactionEntity.getMatchedRuleList(), true);
addLocalDictionaryEntry(textEntity.getType(), textEntity.getValue(), textEntity.getMatchedRuleList(), true);
}
public void addMultipleAuthorsAsRecommendation(RedactionEntity redactionEntity) {
public void addMultipleAuthorsAsRecommendation(TextEntity textEntity) {
String cleanedWord = redactionEntity.getValue().replaceAll(",", " ").replaceAll(" ", " ").trim() + " ";
String cleanedWord = textEntity.getValue().replaceAll(",", " ").replaceAll(" ", " ").trim() + " ";
Pattern pattern = Patterns.AUTHOR_TABLE_SPLITTER;
Matcher matcher = pattern.matcher(cleanedWord);
while (matcher.find()) {
String match = matcher.group().trim();
if (match.length() >= 3) {
addLocalDictionaryEntry(redactionEntity.getType(), match, redactionEntity.getMatchedRuleList(), true);
addLocalDictionaryEntry(textEntity.getType(), match, textEntity.getMatchedRuleList(), true);
}
}
}

View File

@ -9,7 +9,7 @@ import java.util.stream.Collectors;
import org.ahocorasick.trie.Trie;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import lombok.Data;
@ -83,29 +83,29 @@ public class SearchImplementation {
}
public List<Boundary> getBoundaries(CharSequence text) {
public List<TextRange> getBoundaries(CharSequence text) {
if (this.values.isEmpty()) {
return new ArrayList<>();
}
if (this.pattern != null) {
return this.pattern.matcher(text).results().map(r -> new Boundary(r.start(), r.end())).collect(Collectors.toList());
return this.pattern.matcher(text).results().map(r -> new TextRange(r.start(), r.end())).collect(Collectors.toList());
} else {
return this.trie.parseText(text).stream().map(r -> new Boundary(r.getStart(), r.getEnd() + 1)).collect(Collectors.toList());
return this.trie.parseText(text).stream().map(r -> new TextRange(r.getStart(), r.getEnd() + 1)).collect(Collectors.toList());
}
}
public List<Boundary> getBoundaries(CharSequence text, Boundary region) {
public List<TextRange> getBoundaries(CharSequence text, TextRange region) {
if (this.values.isEmpty()) {
return new ArrayList<>();
}
CharSequence subSequence = text.subSequence(region.start(), region.end());
if (this.pattern != null) {
return this.pattern.matcher(subSequence).results().map(r -> new Boundary(r.start() + region.start(), r.end() + region.start())).collect(Collectors.toList());
return this.pattern.matcher(subSequence).results().map(r -> new TextRange(r.start() + region.start(), r.end() + region.start())).collect(Collectors.toList());
} else {
return this.trie.parseText(subSequence).stream().map(r -> new Boundary(r.getStart() + region.start(), r.getEnd() + region.start() + 1)).collect(Collectors.toList());
return this.trie.parseText(subSequence).stream().map(r -> new TextRange(r.getStart() + region.start(), r.getEnd() + region.start() + 1)).collect(Collectors.toList());
}
}

View File

@ -16,6 +16,7 @@ import com.iqser.gin4.commons.metrics.meters.FunctionTimerValues;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Comment;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.legalbasis.LegalBasis;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
@ -29,7 +30,7 @@ import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier;
import com.iqser.red.service.redaction.v1.server.redaction.model.ManualEntity;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryIncrement;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryVersion;
@ -61,6 +62,7 @@ public class AnalyzeService {
RedactionServiceSettings redactionServiceSettings;
ImportedRedactionService importedRedactionService;
SectionFinderService sectionFinderService;
ManualRedactionEntryService manualRedactionEntryService;
FunctionTimerValues redactmanagerAnalyzePagewiseValues;
@ -82,7 +84,7 @@ public class AnalyzeService {
var wrapper = droolsExecutionService.getLatestKieContainer(analyzeRequest.getDossierTemplateId());
log.info("Updated Rules to Version {} for file {} in dossier {}", wrapper.rulesVersion(), analyzeRequest.getFileId(), analyzeRequest.getDossierId());
List<EntityIdentifier> notFoundManualRedactionEntries = addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest, document);
List<ManualEntity> notFoundManualRedactionEntries = manualRedactionEntryService.addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest, document);
entityRedactionService.addDictionaryEntities(dictionary, document);
log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
@ -90,7 +92,10 @@ public class AnalyzeService {
Set<FileAttribute> addedFileAttributes = entityRedactionService.addRuleEntities(dictionary, document, wrapper.container(), analyzeRequest, nerEntities);
log.info("Finished Rule Execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
List<RedactionLogEntry> redactionLogEntries = redactionLogCreatorService.createRedactionLog(document, analyzeRequest.getDossierTemplateId(), notFoundManualRedactionEntries);
List<RedactionLogEntry> redactionLogEntries = redactionLogCreatorService.createRedactionLog(document,
analyzeRequest.getDossierTemplateId(),
notFoundManualRedactionEntries,
getComments(analyzeRequest));
List<LegalBasis> legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId());
RedactionLog redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(),
@ -109,24 +114,19 @@ public class AnalyzeService {
true);
redactionLog.setRedactionLogEntry(importedRedactionFilteredEntries);
return finalizeAnalysis(analyzeRequest,
startTime,
redactionLog,
document.getNumberOfPages(),
dictionary.getVersion(),
false,
addedFileAttributes);
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, document.getNumberOfPages(), dictionary.getVersion(), false, addedFileAttributes);
}
private List<EntityIdentifier> addManualRedactionEntriesAndReturnNotFoundEntries(AnalyzeRequest analyzeRequest, Document document) {
private static Map<String, List<Comment>> getComments(AnalyzeRequest analyzeRequest) {
List<EntityIdentifier> notFoundManualRedactionEntries = Collections.emptyList();
if (analyzeRequest.getManualRedactions() != null) {
notFoundManualRedactionEntries = entityRedactionService.addManualAddRedactionEntities(analyzeRequest.getManualRedactions().getEntriesToAdd(), document);
log.info("Added Manual redaction entries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
if (analyzeRequest.getManualRedactions() == null) {
return Collections.emptyMap();
}
return notFoundManualRedactionEntries;
if (analyzeRequest.getManualRedactions().getComments() == null) {
return Collections.emptyMap();
}
return analyzeRequest.getManualRedactions().getComments();
}
@ -170,7 +170,7 @@ public class AnalyzeService {
var wrapper = droolsExecutionService.getLatestKieContainer(analyzeRequest.getDossierTemplateId());
log.info("Updated Rules to version {} for file {} in dossier {}", wrapper.rulesVersion(), analyzeRequest.getFileId(), analyzeRequest.getDossierId());
List<EntityIdentifier> notFoundManualRedactionEntries = addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest, document);
List<ManualEntity> notFoundManualRedactionEntries = manualRedactionEntryService.addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest, document);
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
log.info("Updated Dictionaries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
@ -186,7 +186,10 @@ public class AnalyzeService {
nerEntities);
log.info("Finished Rule Execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
List<RedactionLogEntry> newRedactionLogEntries = redactionLogCreatorService.createRedactionLog(document, analyzeRequest.getDossierTemplateId(), notFoundManualRedactionEntries);
List<RedactionLogEntry> newRedactionLogEntries = redactionLogCreatorService.createRedactionLog(document,
analyzeRequest.getDossierTemplateId(),
notFoundManualRedactionEntries,
getComments(analyzeRequest));
var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(),
analyzeRequest.getDossierId(),

View File

@ -36,7 +36,7 @@ import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService;
import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.KieWrapper;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary;
@ -84,11 +84,11 @@ public class DroolsExecutionService {
KieSession kieSession = kieContainer.newKieSession();
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService, kieSession);
ManualRedactionApplicationService manualRedactionApplicationService = new ManualRedactionApplicationService(entityCreationService);
ManualChangesApplicationService manualChangesApplicationService = new ManualChangesApplicationService(entityCreationService);
kieSession.setGlobal("document", document);
kieSession.setGlobal("entityCreationService", entityCreationService);
kieSession.setGlobal("manualRedactionApplicationService", manualRedactionApplicationService);
kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService);
kieSession.setGlobal("dictionary", dictionary);
kieSession.insert(document);

View File

@ -9,16 +9,14 @@ import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.CustomEntityCreationAdapter;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.CustomEntityCreationAdapter;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.SearchImplementation;
@ -74,12 +72,6 @@ public class EntityRedactionService {
return allFileAttributes.stream().filter(fileAttribute -> !analyzeRequest.getFileAttributes().contains(fileAttribute)).collect(Collectors.toUnmodifiableSet());
}
public List<EntityIdentifier> addManualAddRedactionEntities(Set<ManualRedactionEntry> manualRedactionEntries, Document document) {
// Entities are automatically added to the DocumentGraph and don't need to be inserted again.
return customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(manualRedactionEntries, document);
}
public void addDictionaryEntities(Dictionary dictionary, SemanticNode node) {
@ -98,7 +90,7 @@ public class EntityRedactionService {
boolean isDossierDictionaryEntry) {
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService);
searchImplementation.getBoundaries(node.getTextBlock(), node.getBoundary())
searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange())
.stream()
.filter(boundary -> entityCreationService.isValidEntityBoundary(node.getTextBlock(), boundary))
.map(bounds -> entityCreationService.forceByBoundary(bounds, type, entityType, node))

View File

@ -0,0 +1,43 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.util.List;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualChange;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualRedactionType;
@Service
public class ManualChangeFactory {
public List<ManualChange> toManualChangeList(List<BaseAnnotation> manualChanges, boolean isHint) {
return manualChanges.stream().map(baseAnnotation -> toManualChange(baseAnnotation, isHint)).toList();
}
private ManualChange toManualChange(BaseAnnotation baseAnnotation, boolean isHint) {
ManualChange manualChange = ManualChange.from(baseAnnotation);
if (baseAnnotation instanceof ManualImageRecategorization imageRecategorization) {
manualChange.withManualRedactionType(ManualRedactionType.RECATEGORIZE).withChange("type", imageRecategorization.getType());
} else if (baseAnnotation instanceof IdRemoval manualRemoval) {
manualChange.withManualRedactionType(manualRemoval.isRemoveFromDictionary() ? ManualRedactionType.REMOVE_FROM_DICTIONARY : ManualRedactionType.REMOVE_LOCALLY);
} else if (baseAnnotation instanceof ManualForceRedaction) {
manualChange.withManualRedactionType(isHint ? ManualRedactionType.FORCE_HINT : ManualRedactionType.FORCE_REDACT);
} else if (baseAnnotation instanceof ManualResizeRedaction manualResizeRedact) {
manualChange.withManualRedactionType(ManualRedactionType.RESIZE).withChange("value", manualResizeRedact.getValue());
} else if (baseAnnotation instanceof ManualRedactionEntry manualRedactionEntry) {
manualChange.withManualRedactionType(manualRedactionEntry.isAddToDictionary() ? ManualRedactionType.ADD_TO_DICTIONARY : ManualRedactionType.ADD_LOCALLY)
.withChange("value", manualRedactionEntry.getValue());
}
return manualChange;
}
}

View File

@ -0,0 +1,58 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.stream.Stream;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.CustomEntityCreationAdapter;
import com.iqser.red.service.redaction.v1.server.redaction.model.ManualEntity;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class ManualRedactionEntryService {
private final CustomEntityCreationAdapter customEntityCreationAdapter;
public List<ManualEntity> addManualRedactionEntriesAndReturnNotFoundEntries(AnalyzeRequest analyzeRequest, Document document) {
List<ManualEntity> notFoundManualRedactionEntries = Collections.emptyList();
if (analyzeRequest.getManualRedactions() != null) {
notFoundManualRedactionEntries = customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(analyzeRequest.getManualRedactions()
.getEntriesToAdd(), document);
log.info("Added Manual redaction entries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
}
if (notFoundManualRedactionEntries.isEmpty()) {
return Collections.emptyList();
}
List<BaseAnnotation> manualChanges = allManualChangesExceptAdd(analyzeRequest.getManualRedactions());
for (ManualEntity notFoundManualRedactionEntry : notFoundManualRedactionEntries) {
manualChanges.stream()
.filter(change -> change.getAnnotationId().equals(notFoundManualRedactionEntry.getId()))
.forEach(change -> notFoundManualRedactionEntry.getManualOverwrite().addChange(change));
}
return notFoundManualRedactionEntries;
}
private List<BaseAnnotation> allManualChangesExceptAdd(ManualRedactions manualRedactions) {
return Stream.of(manualRedactions.getForceRedactions(),
manualRedactions.getResizeRedactions(),
manualRedactions.getImageRecategorization(),
manualRedactions.getIdsToRemove(),
manualRedactions.getLegalBasisChanges()).flatMap(Collection::stream).map(baseAnnotation -> (BaseAnnotation) baseAnnotation).toList();
}
}

View File

@ -6,22 +6,24 @@ import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Comment;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogComment;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.PositionOnPage;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Image;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType;
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier;
import com.iqser.red.service.redaction.v1.server.redaction.model.RectangleWithPage;
import com.iqser.red.service.redaction.v1.server.redaction.model.ManualEntity;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@ -32,49 +34,52 @@ import lombok.extern.slf4j.Slf4j;
public class RedactionLogCreatorService {
private final DictionaryService dictionaryService;
private final ManualChangeFactory manualChangeFactory;
public List<RedactionLogEntry> createRedactionLog(Document document, String dossierTemplateId, List<EntityIdentifier> notFoundManualRedactionEntries) {
public List<RedactionLogEntry> createRedactionLog(Document document,
String dossierTemplateId,
List<ManualEntity> notFoundManualRedactionEntries,
Map<String, List<Comment>> comments) {
List<RedactionLogEntry> entries = new ArrayList<>();
Set<String> processedIds = new HashSet<>();
document.getEntities()
.stream()
.filter(RedactionLogCreatorService::isEntityOrRecommendationType)
.filter(entity -> !entity.isRemoved())
.forEach(entityNode -> entries.addAll(toRedactionLogEntries(entityNode, processedIds, dossierTemplateId)));
document.streamAllImages().filter(image -> !image.isRemoved()).forEach(imageNode -> entries.add(createRedactionLogEntry(imageNode, dossierTemplateId)));
notFoundManualRedactionEntries.forEach(entityIdentifier -> entries.add(createRedactionLogEntry(entityIdentifier, dossierTemplateId)));
.filter(entity -> !entity.removed())
.forEach(entityNode -> entries.addAll(toRedactionLogEntries(entityNode, dossierTemplateId, comments)));
document.streamAllImages().filter(image -> !image.removed()).forEach(imageNode -> entries.add(createRedactionLogEntry(imageNode, dossierTemplateId, comments)));
notFoundManualRedactionEntries.forEach(entityIdentifier -> entries.add(createRedactionLogEntry(entityIdentifier, dossierTemplateId, comments)));
return entries;
}
private static boolean isEntityOrRecommendationType(RedactionEntity redactionEntity) {
private static boolean isEntityOrRecommendationType(TextEntity textEntity) {
return redactionEntity.getEntityType() == EntityType.ENTITY || redactionEntity.getEntityType() == EntityType.RECOMMENDATION;
return textEntity.getEntityType() == EntityType.ENTITY || textEntity.getEntityType() == EntityType.RECOMMENDATION;
}
private List<RedactionLogEntry> toRedactionLogEntries(RedactionEntity redactionEntity, Set<String> processedIds, String dossierTemplateId) {
private List<RedactionLogEntry> toRedactionLogEntries(TextEntity textEntity, String dossierTemplateId, Map<String, List<Comment>> comments) {
Set<String> processedIds = new HashSet<>();
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
// Duplicates can exist due table extraction columns over multiple rows.
for (PositionOnPage positionOnPage : textEntity.getPositionsOnPagePerPage()) {
for (RedactionPosition redactionPosition : redactionEntity.getRedactionPositionsPerPage()) {
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(redactionEntity, dossierTemplateId);
if (processedIds.contains(redactionPosition.getId())) {
// Duplicates should be removed. They might exist due to table extraction duplicating cells spanning multiple columns/rows.
if (processedIds.contains(positionOnPage.getId())) {
continue;
}
processedIds.add(positionOnPage.getId());
processedIds.add(redactionPosition.getId());
redactionLogEntry.setId(redactionPosition.getId());
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(textEntity, dossierTemplateId);
redactionLogEntry.setId(positionOnPage.getId());
redactionLogEntry.setComments(buildRedactionLogComments(comments, positionOnPage.getId()));
List<Rectangle> rectanglesPerLine = redactionPosition.getRectanglePerLine()
List<Rectangle> rectanglesPerLine = positionOnPage.getRectanglePerLine()
.stream()
.map(rectangle2D -> RectangleTransformations.toRedactionLogRectangle(rectangle2D, redactionPosition.getPage().getNumber()))
.map(rectangle2D -> toRedactionLogRectangle(rectangle2D, positionOnPage.getPage().getNumber()))
.toList();
redactionLogEntry.setPositions(rectanglesPerLine);
@ -85,88 +90,128 @@ public class RedactionLogCreatorService {
}
private RedactionLogEntry createRedactionLogEntry(RedactionEntity entity, String dossierTemplateId) {
private List<RedactionLogComment> buildRedactionLogComments(Map<String, List<Comment>> commentsPerId, String id) {
if (!commentsPerId.containsKey(id)) {
return Collections.emptyList();
}
List<Comment> comments = commentsPerId.get(id);
if (comments == null || comments.isEmpty()) {
return Collections.emptyList();
}
return toRedactionLogComments(comments);
}
private List<RedactionLogComment> toRedactionLogComments(List<Comment> comments) {
return comments.stream().map(this::toRedactionLogComment).toList();
}
private RedactionLogComment toRedactionLogComment(Comment comment) {
return new RedactionLogComment(comment.getId(),
comment.getUser(),
comment.getText(),
comment.getAnnotationId(),
comment.getFileId(),
comment.getDate(),
comment.getSoftDeletedTime());
}
private RedactionLogEntry createRedactionLogEntry(TextEntity entity, String dossierTemplateId) {
Set<String> referenceIds = new HashSet<>();
entity.getReferences().stream().filter(RedactionEntity::isActive).forEach(ref -> ref.getRedactionPositionsPerPage().forEach(pos -> referenceIds.add(pos.getId())));
entity.references().stream().filter(TextEntity::active).forEach(ref -> ref.getPositionsOnPagePerPage().forEach(pos -> referenceIds.add(pos.getId())));
int sectionNumber = entity.getDeepestFullyContainingNode().getTreeId().isEmpty() ? 0 : entity.getDeepestFullyContainingNode().getTreeId().get(0);
boolean isHint = isHint(entity.getType(), dossierTemplateId);
return RedactionLogEntry.builder()
.color(getColor(entity.getType(), dossierTemplateId, entity.isApplied()))
.reason(entity.getMatchedRule().getReason())
.legalBasis(entity.getMatchedRule().getLegalBasis())
.value(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue())
.color(getColor(entity.getType(), dossierTemplateId, entity.applied()))
.reason(entity.buildReasonWithManualChangeDescriptions())
.legalBasis(entity.legalBasis())
.value(entity.getManualOverwrite().getValue().orElse(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue()))
.type(entity.getType())
.redacted(entity.isApplied())
.isHint(isHint(entity.getType(), dossierTemplateId))
.redacted(entity.applied())
.isHint(isHint)
.isRecommendation(entity.getEntityType().equals(EntityType.RECOMMENDATION))
.isFalsePositive(entity.getEntityType().equals(EntityType.FALSE_POSITIVE) || entity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION))
.section(entity.getDeepestFullyContainingNode().toString())
.section(entity.getManualOverwrite().getSection().orElse(entity.getDeepestFullyContainingNode().toString()))
.sectionNumber(sectionNumber)
.matchedRule(entity.getMatchedRule().getRuleIdentifier().toString())
.isDictionaryEntry(entity.isDictionaryEntry())
.textAfter(entity.getTextAfter())
.textBefore(entity.getTextBefore())
.startOffset(entity.getBoundary().start())
.endOffset(entity.getBoundary().end())
.startOffset(entity.getTextRange().start())
.endOffset(entity.getTextRange().end())
.isDossierDictionaryEntry(entity.isDossierDictionaryEntry())
.engines(entity.getEngines() != null ? entity.getEngines() : Collections.emptySet())
.reference(referenceIds)
.manualChanges(manualChangeFactory.toManualChangeList(entity.getManualOverwrite().getManualChangeLog(), isHint))
.build();
}
public RedactionLogEntry createRedactionLogEntry(EntityIdentifier entityIdentifier, String dossierTemplateId) {
List<Integer> pageNumbers = entityIdentifier.getEntityPosition().stream().map(RectangleWithPage::pageNumber).toList();
List<Rectangle2D> rectanglesPerLine = entityIdentifier.getEntityPosition().stream().map(RectangleWithPage::rectangle2D).toList();
public RedactionLogEntry createRedactionLogEntry(ManualEntity manualEntity, String dossierTemplateId, Map<String, List<Comment>> comments) {
String type = manualEntity.getManualOverwrite().getType().orElse(manualEntity.getType());
boolean isHint = isHint(type, dossierTemplateId);
return RedactionLogEntry.builder()
.id(entityIdentifier.getId())
.color(getColor(entityIdentifier.getType(), dossierTemplateId, entityIdentifier.isApplied()))
.reason(entityIdentifier.getReason())
.legalBasis(entityIdentifier.getLegalBasis())
.value(entityIdentifier.getValue())
.type(entityIdentifier.getType())
.redacted(entityIdentifier.isApplied())
.isHint(isHint(entityIdentifier.getType(), dossierTemplateId))
.isRecommendation(entityIdentifier.getEntityType().equals(EntityType.RECOMMENDATION))
.isFalsePositive(entityIdentifier.getEntityType().equals(EntityType.FALSE_POSITIVE) || entityIdentifier.getEntityType().equals(EntityType.FALSE_RECOMMENDATION))
.section(entityIdentifier.getSection())
.id(manualEntity.getId())
.color(getColor(type, dossierTemplateId, manualEntity.applied()))
.reason(manualEntity.buildReasonWithManualChangeDescriptions())
.legalBasis(manualEntity.legalBasis())
.value(manualEntity.getManualOverwrite().getValue().orElse(manualEntity.getValue()))
.type(type)
.redacted(manualEntity.applied())
.isHint(isHint)
.isRecommendation(manualEntity.getEntityType().equals(EntityType.RECOMMENDATION))
.isFalsePositive(manualEntity.getEntityType().equals(EntityType.FALSE_POSITIVE) || manualEntity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION))
.section(manualEntity.getManualOverwrite().getSection().orElse(manualEntity.getSection()))
.sectionNumber(0)
.matchedRule("ManualRedaction")
.rectangle(entityIdentifier.isRectangle())
.isDictionaryEntry(entityIdentifier.isDictionaryEntry())
.rectangle(manualEntity.isRectangle())
.isDictionaryEntry(manualEntity.isDictionaryEntry())
.isDossierDictionaryEntry(manualEntity.isDossierDictionaryEntry())
.textAfter("")
.textBefore("")
.startOffset(-1)
.endOffset(-1)
.isDossierDictionaryEntry(entityIdentifier.isDossierDictionaryEntry())
.positions(entityIdentifier.getEntityPosition()
.positions(manualEntity.getEntityPosition()
.stream()
.map(entityPosition -> RectangleTransformations.toRedactionLogRectangle(entityPosition.rectangle2D(), entityPosition.pageNumber()))
.map(entityPosition -> toRedactionLogRectangle(entityPosition.rectangle2D(), entityPosition.pageNumber()))
.collect(Collectors.toList()))
.engines(Collections.emptySet())
.reference(Collections.emptySet())
.manualChanges(manualChangeFactory.toManualChangeList(manualEntity.getManualOverwrite().getManualChangeLog(), isHint))
.comments(buildRedactionLogComments(comments, manualEntity.getId()))
.build();
}
public RedactionLogEntry createRedactionLogEntry(Image image, String dossierTemplateId) {
String imageType = image.getImageType().equals(ImageType.OTHER) ? "image" : image.getImageType().toString().toLowerCase(Locale.ROOT);
public RedactionLogEntry createRedactionLogEntry(Image image, String dossierTemplateId, Map<String, List<Comment>> comments) {
String imageType = image.getImageType().equals(ImageType.OTHER) ? "image" : image.getImageType().toString().toLowerCase(Locale.ENGLISH);
boolean isHint = dictionaryService.isHint(imageType, dossierTemplateId);
return RedactionLogEntry.builder()
.id(image.getId())
.color(getColor(image.getImageType().toString().toLowerCase(Locale.ROOT), dossierTemplateId, image.isApplied()))
.color(getColor(imageType, dossierTemplateId, image.applied()))
.isImage(true)
.type(imageType)
.redacted(image.isApplied())
.reason(image.getMatchedRule().getReason())
.legalBasis(image.getMatchedRule().getLegalBasis())
.redacted(image.applied())
.reason(image.buildReasonWithManualChangeDescriptions())
.legalBasis(image.legalBasis())
.matchedRule(image.getMatchedRule().getRuleIdentifier().toString())
.isHint(dictionaryService.isHint(image.getImageType().toString().toLowerCase(Locale.ROOT), dossierTemplateId))
.isHint(isHint)
.isDictionaryEntry(false)
.isRecommendation(false)
.positions(List.of(RectangleTransformations.toRedactionLogRectangle(image.getPosition(), image.getPage().getNumber())))
.positions(List.of(toRedactionLogRectangle(image.getPosition(), image.getPage().getNumber())))
.sectionNumber(image.getTreeId().get(0))
.section(image.getParent().toString())
.section(image.getManualOverwrite().getSection().orElse(image.getParent().toString()))
.imageHasTransparency(image.isTransparent())
.manualChanges(manualChangeFactory.toManualChangeList(image.getManualOverwrite().getManualChangeLog(), isHint))
.comments(buildRedactionLogComments(comments, image.getId()))
.build();
}
@ -186,4 +231,13 @@ public class RedactionLogCreatorService {
return dictionaryService.isHint(type, dossierTemplateId);
}
private Rectangle toRedactionLogRectangle(Rectangle2D rectangle2D, int pageNumber) {
return new Rectangle(new Point((float) rectangle2D.getMinX(), (float) (rectangle2D.getMinY() + rectangle2D.getHeight())),
(float) rectangle2D.getWidth(),
-(float) rectangle2D.getHeight(),
pageNumber);
}
}

View File

@ -3,7 +3,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.utils;
import java.util.Set;
import java.util.regex.Pattern;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import lombok.experimental.UtilityClass;
@ -14,7 +14,7 @@ import lombok.extern.slf4j.Slf4j;
public final class SeparatorUtils {
private final static Pattern punctuationPattern = Pattern.compile("\\p{Punct}");
private final static Set<Character> quotes = Set.of('\'', '\u0022', '\u00AB', '\u00BB', '\u2018', '\u2019', '\u201A', '\u201C', '\u201D', '\u201E', '\u2039', '\u203A');
private final static Set<Character> quotes = Set.of('\'', '"', '«', '»', '', '', '', '“', '”', '„', '', '');
private final static Set<Integer> japaneseAltPunctuationMarks = Set.of(65288, 65289, 65294, 65339, 65341, 65371, 65373, 65375, 65376, 12443, 12444, 65309, 65306);
@ -32,9 +32,9 @@ public final class SeparatorUtils {
}
public static boolean isWhiteSpacesOrSeparatorsOnly(TextBlock textBlock, Boundary boundary) {
public static boolean isWhiteSpacesOrSeparatorsOnly(TextBlock textBlock, TextRange textRange) {
String stringWithoutWhiteSpace = textBlock.subSequence(boundary).toString().replace(" ", "");
String stringWithoutWhiteSpace = textBlock.subSequence(textRange).toString().replace(" ", "");
int numberOfSeparators = 0;
for (int i = 0; i < stringWithoutWhiteSpace.length(); i++) {
if (isSeparator(stringWithoutWhiteSpace.charAt(i))) {
@ -45,25 +45,25 @@ public final class SeparatorUtils {
}
public static boolean boundaryIsSurroundedBySeparators(TextBlock textBlock, Boundary boundary) {
public static boolean boundaryIsSurroundedBySeparators(TextBlock textBlock, TextRange textRange) {
return validateStart(textBlock, boundary) && validateEnd(textBlock, boundary) && !isWhiteSpacesOrSeparatorsOnly(textBlock, boundary);
return validateStart(textBlock, textRange) && validateEnd(textBlock, textRange) && !isWhiteSpacesOrSeparatorsOnly(textBlock, textRange);
}
private static boolean validateEnd(TextBlock textBlock, Boundary boundary) {
private static boolean validateEnd(TextBlock textBlock, TextRange textRange) {
return boundary.end() == textBlock.getBoundary().end() ||//
SeparatorUtils.isSeparator(textBlock.charAt(boundary.end())) ||//
SeparatorUtils.isJapaneseSeparator(textBlock.charAt(boundary.end() - 1));
return textRange.end() == textBlock.getTextRange().end() ||//
SeparatorUtils.isSeparator(textBlock.charAt(textRange.end())) ||//
SeparatorUtils.isJapaneseSeparator(textBlock.charAt(textRange.end() - 1));
}
private static boolean validateStart(TextBlock textBlock, Boundary boundary) {
private static boolean validateStart(TextBlock textBlock, TextRange textRange) {
return boundary.start() == textBlock.getBoundary().start() ||//
SeparatorUtils.isSeparator(textBlock.charAt(boundary.start() - 1)) ||//
SeparatorUtils.isJapaneseSeparator(textBlock.charAt(boundary.start()));
return textRange.start() == textBlock.getTextRange().start() ||//
SeparatorUtils.isSeparator(textBlock.charAt(textRange.start() - 1)) ||//
SeparatorUtils.isJapaneseSeparator(textBlock.charAt(textRange.start()));
}
}

View File

@ -5,10 +5,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.mockito.ArgumentMatchers.anyLong;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.when;
import static org.wildfly.common.Assert.assertTrue;
import java.awt.geom.Rectangle2D;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
@ -19,7 +16,6 @@ import java.nio.file.Paths;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -31,7 +27,6 @@ import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
@ -62,21 +57,13 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSON
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ChangeType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
import com.iqser.red.service.redaction.v1.server.document.data.mapper.DocumentGraphMapper;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Section;
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
@ -93,11 +80,6 @@ import lombok.SneakyThrows;
public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
private static final String RULES = loadFromClassPath("drools/rules.drl");
@Autowired
private EntityEnrichmentService entityEnrichmentService;
@Autowired
private DroolsExecutionService droolsExecutionService;
@Configuration
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
@ -113,11 +95,6 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
}
}
@BeforeEach
public void invalidateCaches() {
// droolsExecutionService.invalidateKieContainerCache();
}
@BeforeEach
@ -234,7 +211,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
@Test
public void titleExtraction() throws IOException {
AnalyzeRequest request = uploadFileToStorage("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf");
AnalyzeRequest request = uploadFileToStorage("files/new/crafted document.pdf");
System.out.println("Start Full integration test");
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
System.out.println("Finished structure analysis");
@ -393,7 +370,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
continue loop;
}
if (redactionLogEntry.getSectionNumber() == section.getTreeId().get(0)) {
String value = section.getTextBlock().subSequence(new Boundary(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset())).toString();
String value = section.getTextBlock().subSequence(new TextRange(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset())).toString();
if (redactionLogEntry.getValue().equalsIgnoreCase(value)) {
correctFound++;
} else {
@ -542,7 +519,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
List<String> valuesInDocument = redactionLog.getRedactionLogEntry()
.stream()
.filter(e -> !e.isImage())
.map(redactionLogEntry -> new Boundary(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset()))
.map(redactionLogEntry -> new TextRange(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset()))
.map(boundary -> documentGraph.getTextBlock().subSequence(boundary).toString())
.toList();
List<String> valuesInRedactionLog = redactionLog.getRedactionLogEntry().stream().filter(e -> !e.isImage()).map(RedactionLogEntry::getValue).toList();
@ -697,75 +674,6 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
}
@Test
@SneakyThrows
public void testManualResizeRedactionRemovesContainedEntities() {
String filePath = "files/new/crafted document.pdf";
AnalyzeRequest request = uploadFileToStorage(filePath);
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
AnalyzeResult result = analyzeService.analyze(request);
String testEntityValue1 = "Desiree";
String testEntityValue2 = "Melanie";
RedactionLog redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
assertEquals(2, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue1)).count());
assertEquals(2, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue2)).count());
Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(TEST_DOSSIER_ID, TEST_FILE_ID));
String expandedEntityKeyword = "Lorem ipsum dolor sit amet, consectetur adipiscing elit Desiree et al sed do eiusmod tempor incididunt ut labore et dolore magna aliqua Melanie et al. Reference No 12345 Lorem ipsum.";
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService);
RedactionEntity expandedEntity = entityCreationService.byString(expandedEntityKeyword, "PII", EntityType.ENTITY, document).findFirst().get();
String idToResize = redactionLog.getRedactionLogEntry()
.stream()
.filter(entry -> entry.getValue().equals(testEntityValue1))
.max(Comparator.comparingInt(RedactionLogEntry::getStartOffset))
.get()
.getId();
List<Rectangle> resizedPositions = expandedEntity.getRedactionPositionsPerPage()
.get(0)
.getRectanglePerLine()
.stream()
.map(rectangle2D -> toAnnotationRectangle(rectangle2D, 3))
.toList();
ManualResizeRedaction manualResizeRedaction = ManualResizeRedaction.builder()
.annotationId(idToResize)
.value(expandedEntityKeyword)
.positions(resizedPositions)
.status(AnnotationStatus.APPROVED)
.build();
ManualRedactions manualRedactions = new ManualRedactions();
manualRedactions.getResizeRedactions().add(manualResizeRedaction);
request.setManualRedactions(manualRedactions);
AnalyzeResult reanalyzeResult = analyzeService.reanalyze(request);
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
String annotatedFileName = Paths.get(filePath).getFileName().toString().replace(".pdf", "_annotated2.pdf");
File tmpFile = Paths.get(OsUtils.getTemporaryDirectory(), annotatedFileName).toFile();
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
try (FileOutputStream fileOutputStream = new FileOutputStream(tmpFile)) {
fileOutputStream.write(annotateResponse.getDocument());
}
RedactionLogEntry resizedEntry = redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(expandedEntityKeyword)).findFirst().get();
assertTrue(resizedEntry.getChanges().get(resizedEntry.getChanges().size() - 1).getType().equals(ChangeType.CHANGED));
assertEquals(idToResize, resizedEntry.getId());
assertEquals(1, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue1)).count());
assertEquals(1, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue2) && !entry.lastChangeIsRemoved()).count());
}
private static com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle toAnnotationRectangle(Rectangle2D rectangle2D, int pageNumber) {
return new com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle((float) rectangle2D.getMaxX(),
(float) rectangle2D.getMaxY() - (float) rectangle2D.getHeight(),
(float) rectangle2D.getWidth(),
-(float) rectangle2D.getHeight(),
pageNumber);
}
@Test
public void testTableRedactionWithCvTableService() throws IOException {
@ -967,71 +875,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
@Test
public void testManualRedaction() throws IOException {
System.out.println("testManualRedaction");
long start = System.currentTimeMillis();
String pdfFile = "files/Minimal Examples/Single Table.pdf";
ManualRedactions manualRedactions = new ManualRedactions();
String manualAddId = UUID.randomUUID().toString();
Comment comment = Comment.builder().date(OffsetDateTime.now()).user("TEST_USER").text("This is a comment test").build();
manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf").fileId("fileId").status(AnnotationStatus.DECLINED).build()));
manualRedactions.setForceRedactions(Set.of(ManualForceRedaction.builder()
.annotationId("675eba69b0c2917de55462c817adaa05")
.fileId("fileId")
.legalBasis("Something")
.status(AnnotationStatus.APPROVED)
.build()));
manualRedactions.getComments().put("e5be0f1d941bbb92a068e198648d06c4", List.of(comment));
manualRedactions.getComments().put("0836727c3508a0b2ea271da69c04cc2f", List.of(comment));
manualRedactions.getComments().put(manualAddId, List.of(comment));
ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry();
manualRedactionEntry.setAnnotationId(manualAddId);
manualRedactionEntry.setFileId("fileId");
manualRedactionEntry.setStatus(AnnotationStatus.REQUESTED);
manualRedactionEntry.setType("name");
manualRedactionEntry.setValue("O'Loughlin C.K.");
manualRedactionEntry.setReason("Manual Redaction");
manualRedactionEntry.setPositions(List.of(Rectangle.builder().topLeftX(375.61096f).topLeftY(241.282f).width(7.648041f).height(43.72262f).page(1).build(),
Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build()));
AnalyzeRequest request = uploadFileToStorage(pdfFile);
request.setManualRedactions(manualRedactions);
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
AnalyzeResult result = analyzeService.analyze(request);
manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf").fileId("fileId").status(AnnotationStatus.APPROVED).build()));
manualRedactions.setLegalBasisChanges((Set.of(ManualLegalBasisChange.builder()
.annotationId("675eba69b0c2917de55462c817adaa05")
.fileId("fileId")
.legalBasis("Manual Legal Basis Change")
.status(AnnotationStatus.APPROVED)
.build())));
analyzeService.reanalyze(request);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) {
fileOutputStream.write(annotateResponse.getDocument());
}
long end = System.currentTimeMillis();
System.out.println("duration: " + (end - start));
System.out.println("numberOfPages: " + result.getNumberOfPages());
}
@Test
public void phantomCellsDocumentTest() throws IOException {
public void phantomCellsDocumentTest() {
AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/Phantom Cells.pdf");

View File

@ -180,9 +180,6 @@ public class AnnotationService {
private String createAnnotationContent(RedactionLogEntry redactionLogEntry) {
if (redactionLogEntry.isLocalManualRedaction()) {
return "\nManual Redaction\n\nIn Section : \"" + redactionLogEntry.getSection() + "\"";
}
return redactionLogEntry.getType() + " \nRule " + redactionLogEntry.getMatchedRule() + " matched\n\n" + redactionLogEntry.getReason() + "\n\nLegal basis:" + redactionLogEntry.getLegalBasis() + "\n\nIn section: \"" + redactionLogEntry.getSection() + "\"";
}

View File

@ -5,16 +5,16 @@ import static org.junit.jupiter.api.Assertions.assertThrows;
import org.junit.jupiter.api.Test;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
public class RedactionEntityTest {
public class TextEntityTest {
@Test
public void testMatchedRule() {
RedactionEntity entity = RedactionEntity.initialEntityNode(new Boundary(1, 100), "PII", EntityType.ENTITY);
TextEntity entity = TextEntity.initialEntityNode(new TextRange(1, 100), "PII", EntityType.ENTITY);
entity.skip("CBI.1.0", "");
entity.skip("CBI.2.0", "");
entity.skip("CBI.3.0", "");
@ -25,24 +25,11 @@ public class RedactionEntityTest {
}
@Test
public void testMatchedRuleWithManualRedaction() {
RedactionEntity entity = RedactionEntity.initialEntityNode(new Boundary(1, 100), "PII", EntityType.ENTITY);
entity.skip("MAN.2.0", "");
entity.skip("CBI.2.0", "");
entity.skip("CBI.3.0", "");
entity.skip("CBI.4.1", "");
entity.skip("CBI.4.0", "");
assertThat(entity.getMatchedRule().getRuleIdentifier().toString()).isEqualTo("MAN.2.0");
assertThat(entity.getMatchedRuleUnit()).isEqualTo(2);
}
@Test
public void testMatchedRuleWithNonsense() {
RedactionEntity entity = RedactionEntity.initialEntityNode(new Boundary(1, 100), "PII", EntityType.ENTITY);
TextEntity entity = TextEntity.initialEntityNode(new TextRange(1, 100), "PII", EntityType.ENTITY);
assertThrows(IllegalArgumentException.class, () -> {
entity.skip("", "");
});

View File

@ -1,85 +0,0 @@
package com.iqser.red.service.redaction.v1.server.document.graph;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.Collections;
import java.util.List;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
class BoundaryTest {
Boundary startBoundary;
@BeforeEach
void setUp() {
startBoundary = new Boundary(10, 100);
}
@Test
void testContains() {
assertTrue(startBoundary.contains(11));
assertTrue(startBoundary.contains(50));
assertFalse(startBoundary.contains(9));
assertFalse(startBoundary.contains(100));
assertFalse(startBoundary.contains(150));
assertFalse(startBoundary.contains(-123));
assertTrue(startBoundary.contains(new Boundary(11, 99)));
assertTrue(startBoundary.contains(new Boundary(10, 100)));
assertTrue(startBoundary.contains(new Boundary(11, 11)));
assertFalse(startBoundary.contains(9, 100));
assertTrue(startBoundary.contains(100, 100));
assertFalse(startBoundary.contains(100, 101));
assertFalse(startBoundary.contains(150, 151));
}
@Test
void testIntersects() {
assertTrue(startBoundary.intersects(new Boundary(1, 11)));
assertTrue(startBoundary.intersects(new Boundary(11, 12)));
assertTrue(startBoundary.intersects(new Boundary(11, 100)));
assertFalse(startBoundary.intersects(new Boundary(100, 101)));
assertFalse(startBoundary.intersects(new Boundary(9, 10)));
assertFalse(startBoundary.intersects(new Boundary(0, 1)));
assertFalse(startBoundary.intersects(new Boundary(1000, 1001)));
assertTrue(startBoundary.intersects(new Boundary(99, 101)));
assertTrue(startBoundary.intersects(new Boundary(99, 101)));
assertTrue(startBoundary.intersects(new Boundary(9, 101)));
}
@Test
void testSplit() {
assertEquals(4, startBoundary.split(List.of(12, 40, 90)).size());
assertEquals(List.of(new Boundary(10, 12), new Boundary(12, 40), new Boundary(40, 90), new Boundary(90, 100)), startBoundary.split(List.of(12, 40, 90)));
assertEquals(List.of(new Boundary(10, 40), new Boundary(40, 100)), startBoundary.split(List.of(40)));
assertEquals(1, startBoundary.split(Collections.emptyList()).size());
assertEquals(1, startBoundary.split(List.of(startBoundary.start())).size());
assertThrows(IndexOutOfBoundsException.class, () -> startBoundary.split(Collections.singletonList(0)));
assertThrows(IndexOutOfBoundsException.class, () -> startBoundary.split(Collections.singletonList(100)));
assertThrows(IndexOutOfBoundsException.class, () -> startBoundary.split(List.of(12, 40, 100)));
}
@Test
void testCompareTo() {
Boundary beforeBoundary = new Boundary(1, 8);
Boundary afterBoundary = new Boundary(101, 102);
assertEquals(-1, beforeBoundary.compareTo(startBoundary));
assertEquals(1, afterBoundary.compareTo(startBoundary));
}
}

View File

@ -17,7 +17,7 @@ import org.mockito.MockitoAnnotations;
import org.springframework.beans.factory.annotation.Autowired;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Headline;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.NodeType;
@ -67,22 +67,22 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra
Document document = buildGraph("files/new/crafted document.pdf");
String type = "CBI_author";
assertTrue(entityCreationService.byBoundary(new Boundary(0, 10), type, EntityType.ENTITY, document).isPresent());
assertTrue(entityCreationService.byBoundary(new Boundary(0, 10), type, EntityType.ENTITY, document).isPresent());
assertTrue(entityCreationService.byBoundary(new TextRange(0, 10), type, EntityType.ENTITY, document).isPresent());
assertTrue(entityCreationService.byBoundary(new TextRange(0, 10), type, EntityType.ENTITY, document).isPresent());
assertEquals(1, document.getEntities().size());
verify(kieSession, times(1)).insert(any(RedactionEntity.class));
verify(kieSession, times(1)).insert(any(TextEntity.class));
}
private RedactionEntity createAndInsertEntity(Document document, String searchTerm) {
private TextEntity createAndInsertEntity(Document document, String searchTerm) {
int start = document.getTextBlock().indexOf(searchTerm);
assert start != -1;
Boundary boundary = new Boundary(start, start + searchTerm.length());
RedactionEntity redactionEntity = RedactionEntity.initialEntityNode(boundary, "123", EntityType.ENTITY);
entityCreationService.addEntityToGraph(redactionEntity, document);
return redactionEntity;
TextRange textRange = new TextRange(start, start + searchTerm.length());
TextEntity textEntity = TextEntity.initialEntityNode(textRange, "123", EntityType.ENTITY);
entityCreationService.addEntityToGraph(textEntity, document);
return textEntity;
}
@ -91,18 +91,18 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra
Document document = buildGraph("files/new/crafted document");
String searchTerm = "Clarissa";
RedactionEntity redactionEntity = createAndInsertEntity(document, searchTerm);
TextEntity textEntity = createAndInsertEntity(document, searchTerm);
assertEquals("Expand to Hint ", redactionEntity.getTextBefore());
assertEquals("s Donut ←", redactionEntity.getTextAfter());
assertEquals(searchTerm, redactionEntity.getValue());
assertEquals("Expand to Hint ", textEntity.getTextBefore());
assertEquals("s Donut ←", textEntity.getTextAfter());
assertEquals(searchTerm, textEntity.getValue());
assertEquals("Rule 5: Do not redact genitive CBI_authors (Entries based on Dict) ",
redactionEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
assertEquals(3, redactionEntity.getIntersectingNodes().size());
assertEquals(5, redactionEntity.getDeepestFullyContainingNode().getNumberOnPage());
assertInstanceOf(Paragraph.class, redactionEntity.getDeepestFullyContainingNode());
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
assertEquals(3, textEntity.getIntersectingNodes().size());
assertEquals(5, textEntity.getDeepestFullyContainingNode().getNumberOnPage());
assertInstanceOf(Paragraph.class, textEntity.getDeepestFullyContainingNode());
assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity);
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
}
@ -111,17 +111,17 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra
Document document = buildGraph("files/new/crafted document");
String searchTerm = "Rule 39:";
RedactionEntity redactionEntity = createAndInsertEntity(document, searchTerm);
TextEntity textEntity = createAndInsertEntity(document, searchTerm);
assertEquals("", redactionEntity.getTextBefore());
assertEquals(" Purity Hint", redactionEntity.getTextAfter());
assertEquals(searchTerm, redactionEntity.getValue());
assertEquals("Rule 39: Purity Hint ", redactionEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
assertEquals(3, redactionEntity.getIntersectingNodes().size());
assertEquals(6, redactionEntity.getDeepestFullyContainingNode().getNumberOnPage());
assertInstanceOf(Headline.class, redactionEntity.getDeepestFullyContainingNode());
assertEquals("", textEntity.getTextBefore());
assertEquals(" Purity Hint", textEntity.getTextAfter());
assertEquals(searchTerm, textEntity.getValue());
assertEquals("Rule 39: Purity Hint ", textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
assertEquals(3, textEntity.getIntersectingNodes().size());
assertEquals(6, textEntity.getDeepestFullyContainingNode().getNumberOnPage());
assertInstanceOf(Headline.class, textEntity.getDeepestFullyContainingNode());
assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity);
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
}
@ -130,17 +130,17 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra
Document document = buildGraph("files/new/crafted document");
String searchTerm = "1998";
RedactionEntity redactionEntity = createAndInsertEntity(document, searchTerm);
TextEntity textEntity = createAndInsertEntity(document, searchTerm);
assertEquals("", redactionEntity.getTextBefore());
assertEquals("", redactionEntity.getTextAfter());
assertEquals(searchTerm, redactionEntity.getValue());
assertEquals("Rule 6-11 (Authors Table) ", redactionEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
assertEquals(5, redactionEntity.getIntersectingNodes().size());
assertEquals(15, redactionEntity.getDeepestFullyContainingNode().getNumberOnPage());
assertInstanceOf(TableCell.class, redactionEntity.getDeepestFullyContainingNode());
assertEquals("", textEntity.getTextBefore());
assertEquals("", textEntity.getTextAfter());
assertEquals(searchTerm, textEntity.getValue());
assertEquals("Rule 6-11 (Authors Table) ", textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
assertEquals(5, textEntity.getIntersectingNodes().size());
assertEquals(15, textEntity.getDeepestFullyContainingNode().getNumberOnPage());
assertInstanceOf(TableCell.class, textEntity.getDeepestFullyContainingNode());
assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity);
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
}
@ -212,19 +212,19 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra
Document document = buildGraph("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
String searchTerm = "Cucurbit";
RedactionEntity redactionEntity = createAndInsertEntity(document, searchTerm);
TextEntity textEntity = createAndInsertEntity(document, searchTerm);
assertEquals("except Cranberry; Vegetable, ", redactionEntity.getTextBefore());
assertEquals(", Group 9;", redactionEntity.getTextAfter());
assertEquals("except Cranberry; Vegetable, ", textEntity.getTextBefore());
assertEquals(", Group 9;", textEntity.getTextAfter());
assertEquals("1.1.4 Evaluations carried out under other regulatory contexts ",
redactionEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
assertEquals(searchTerm, redactionEntity.getValue());
assertEquals(3, redactionEntity.getIntersectingNodes().size());
assertEquals(5, redactionEntity.getDeepestFullyContainingNode().getNumberOnPage());
assertTrue(redactionEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 10));
assertInstanceOf(Paragraph.class, redactionEntity.getDeepestFullyContainingNode());
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
assertEquals(searchTerm, textEntity.getValue());
assertEquals(3, textEntity.getIntersectingNodes().size());
assertEquals(5, textEntity.getDeepestFullyContainingNode().getNumberOnPage());
assertTrue(textEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 10));
assertInstanceOf(Paragraph.class, textEntity.getDeepestFullyContainingNode());
assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity);
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
}
@ -238,21 +238,21 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra
start = document.getTextBlock().indexOf(searchTerm, start + 1);
assert start != -1;
Boundary boundary = new Boundary(start, start + searchTerm.length());
RedactionEntity redactionEntity = RedactionEntity.initialEntityNode(boundary, "123", EntityType.ENTITY);
entityCreationService.addEntityToGraph(redactionEntity, document);
TextRange textRange = new TextRange(start, start + searchTerm.length());
TextEntity textEntity = TextEntity.initialEntityNode(textRange, "123", EntityType.ENTITY);
entityCreationService.addEntityToGraph(textEntity, document);
assertEquals("2.6.1 Summary of ", redactionEntity.getTextBefore());
assertEquals(" and excretion in", redactionEntity.getTextAfter());
assertEquals("2.6.1 Summary of ", textEntity.getTextBefore());
assertEquals(" and excretion in", textEntity.getTextAfter());
assertEquals("2.6.1 Summary of absorption, distribution, metabolism and excretion in mammals ",
redactionEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
assertEquals(searchTerm, redactionEntity.getValue());
assertEquals(3, redactionEntity.getIntersectingNodes().size());
assertEquals(4, redactionEntity.getDeepestFullyContainingNode().getNumberOnPage());
assertTrue(redactionEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 33));
assertInstanceOf(Headline.class, redactionEntity.getDeepestFullyContainingNode());
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
assertEquals(searchTerm, textEntity.getValue());
assertEquals(3, textEntity.getIntersectingNodes().size());
assertEquals(4, textEntity.getDeepestFullyContainingNode().getNumberOnPage());
assertTrue(textEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 33));
assertInstanceOf(Headline.class, textEntity.getDeepestFullyContainingNode());
assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity);
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
}
@ -261,32 +261,32 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra
Document document = buildGraph("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
String searchTerm = "N-deacetylation product";
RedactionEntity redactionEntity = createAndInsertEntity(document, searchTerm);
TextEntity textEntity = createAndInsertEntity(document, searchTerm);
assertEquals("2-[(2-(1-hydroxy-ethyl)-6methyl-phenyl-amino]propan-1-ol (", redactionEntity.getTextBefore());
assertEquals(" of metabolite of", redactionEntity.getTextAfter());
assertEquals(searchTerm, redactionEntity.getValue());
assertEquals(4, redactionEntity.getIntersectingNodes().size());
assertEquals("2-[(2-(1-hydroxy-ethyl)-6methyl-phenyl-amino]propan-1-ol (", textEntity.getTextBefore());
assertEquals(" of metabolite of", textEntity.getTextAfter());
assertEquals(searchTerm, textEntity.getValue());
assertEquals(4, textEntity.getIntersectingNodes().size());
assertEquals("Table 2.7-1: List of substances and metabolites and related structural formula ",
redactionEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
assertTrue(redactionEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 54));
assertEquals(26, redactionEntity.getDeepestFullyContainingNode().getNumberOnPage());
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
assertTrue(textEntity.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 54));
assertEquals(26, textEntity.getDeepestFullyContainingNode().getNumberOnPage());
assertInstanceOf(TableCell.class, redactionEntity.getDeepestFullyContainingNode());
assertInstanceOf(TableCell.class, textEntity.getDeepestFullyContainingNode());
assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity);
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
}
// this might fail, if an entity with the same name exists twice in the deepest containing node
private static void assertSameOffsetInAllIntersectingNodes(String searchTerm, RedactionEntity redactionEntity) {
private static void assertSameOffsetInAllIntersectingNodes(String searchTerm, TextEntity textEntity) {
List<Integer> paragraphStart = redactionEntity.getIntersectingNodes().stream()//
List<Integer> paragraphStart = textEntity.getIntersectingNodes().stream()//
.map(SemanticNode::getTextBlock)//
.map(textBlock -> textBlock.indexOf(searchTerm, redactionEntity.getDeepestFullyContainingNode().getBoundary().start()))//
.map(textBlock -> textBlock.indexOf(searchTerm, textEntity.getDeepestFullyContainingNode().getTextRange().start()))//
.toList();
paragraphStart.forEach(nodeStart -> assertEquals(redactionEntity.getBoundary().start(), nodeStart));
paragraphStart.forEach(nodeStart -> assertEquals(textEntity.getTextRange().start(), nodeStart));
}
@ -296,17 +296,17 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra
assert start != -1;
Boundary boundary = new Boundary(start, start + searchTerm.length());
RedactionEntity redactionEntity = RedactionEntity.initialEntityNode(boundary, "123", EntityType.ENTITY);
entityCreationService.addEntityToGraph(redactionEntity, document);
TextRange textRange = new TextRange(start, start + searchTerm.length());
TextEntity textEntity = TextEntity.initialEntityNode(textRange, "123", EntityType.ENTITY);
entityCreationService.addEntityToGraph(textEntity, document);
Page pageNode = document.getPages().stream().filter(page -> page.getNumber() == pageNumber).findFirst().orElseThrow();
assertEquals(redactionEntity.getValue(), searchTerm);
assertTrue(pageNode.getEntities().contains(redactionEntity));
assertTrue(document.getPages().stream().filter(page -> page != pageNode).noneMatch(page -> page.getEntities().contains(redactionEntity)));
assertTrue(redactionEntity.getPages().contains(pageNode));
assertSameOffsetInAllIntersectingNodes(searchTerm, redactionEntity);
assertTrue(redactionEntity.getIntersectingNodes().stream().allMatch(node -> node.getEntities().contains(redactionEntity)));
assertEquals(textEntity.getValue(), searchTerm);
assertTrue(pageNode.getEntities().contains(textEntity));
assertTrue(document.getPages().stream().filter(page -> page != pageNode).noneMatch(page -> page.getEntities().contains(textEntity)));
assertTrue(textEntity.getPages().contains(pageNode));
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
assertTrue(textEntity.getIntersectingNodes().stream().allMatch(node -> node.getEntities().contains(textEntity)));
}
}

View File

@ -33,7 +33,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Section;
@ -136,7 +136,7 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(TEST_DOSSIER_TEMPLATE_ID, TEST_DOSSIER_ID);
long dictionarySearchStart = System.currentTimeMillis();
List<RedactionEntity> foundEntities = new LinkedList<>();
List<TextEntity> foundEntities = new LinkedList<>();
for (DictionaryModel model : dictionary.getDictionaryModels()) {
findEntitiesWithSearchImplementation(document, model.getEntriesSearch(), EntityType.ENTITY, foundEntities, model.getType());
findEntitiesWithSearchImplementation(document, model.getFalsePositiveSearch(), EntityType.FALSE_POSITIVE, foundEntities, model.getType());
@ -210,7 +210,7 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
float totalSearchTime = 0;
float totalGraphTime = 0;
float totalInsertTime = 0;
List<RedactionEntity> foundEntities = new LinkedList<>();
List<TextEntity> foundEntities = new LinkedList<>();
for (int i = 0; i < numberOfRuns; i++) {
foundEntities = new LinkedList<>();
@ -256,9 +256,9 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
for (Page page : document.getPages()) {
List<Rectangle2D> entityPositionsOnPage = page.getEntities()
.stream()
.filter(entityNode -> !entityNode.isRemoved())
.filter(RedactionEntity::isApplied)
.flatMap(entityNode -> entityNode.getRedactionPositionsPerPage().stream())
.filter(entityNode -> !entityNode.removed())
.filter(TextEntity::applied)
.flatMap(entityNode -> entityNode.getPositionsOnPagePerPage().stream())
.filter(entityPosition -> entityPosition.getPage().equals(page))
.flatMap(entityPosition -> entityPosition.getRectanglePerLine().stream())
.toList();
@ -270,9 +270,9 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
for (Page page : document.getPages()) {
List<Rectangle2D> entityPositionsOnPage = page.getEntities()
.stream()
.filter(entityNode -> !entityNode.isRemoved())
.filter(entityNode -> !entityNode.isApplied())
.flatMap(entityNode -> entityNode.getRedactionPositionsPerPage().stream())
.filter(entityNode -> !entityNode.removed())
.filter(entityNode -> !entityNode.applied())
.flatMap(entityNode -> entityNode.getPositionsOnPagePerPage().stream())
.filter(entityPosition -> entityPosition.getPage().equals(page))
.flatMap(entityPosition -> entityPosition.getRectanglePerLine().stream())
.toList();
@ -289,14 +289,14 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
private void findEntitiesWithSearchImplementation(Document document,
SearchImplementation searchImplementation,
EntityType entityType,
List<RedactionEntity> foundEntities,
List<TextEntity> foundEntities,
String type) {
TextBlock textBlock = document.getTextBlock();
searchImplementation.getBoundaries(textBlock, textBlock.getBoundary())
searchImplementation.getBoundaries(textBlock, textBlock.getTextRange())
.stream()
.filter(boundary -> boundaryIsSurroundedBySeparators(textBlock, boundary))
.map(bounds -> RedactionEntity.initialEntityNode(bounds, type, entityType))
.map(bounds -> TextEntity.initialEntityNode(bounds, type, entityType))
.forEach(foundEntities::add);
}

View File

@ -5,6 +5,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.when;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
@ -27,7 +28,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlo
import com.iqser.red.service.redaction.v1.server.document.data.mapper.DocumentGraphMapper;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.CustomEntityCreationAdapter;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier;
import com.iqser.red.service.redaction.v1.server.redaction.model.ManualEntity;
import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.tenantcommons.TenantContext;
@ -88,9 +89,9 @@ public class MigrationPocTest extends BuildDocumentIntegrationTest {
// IMPORTANT: always use the graph which is mapped from the DocumentData, since rounding errors occur during storage.
Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(request.getDossierId(), request.getFileId()));
List<EntityIdentifier> notFoundManualRedactionEntries = redactionLogAdapter.toRedactionEntity(originalRedactionLog, document);
List<ManualEntity> notFoundManualRedactionEntries = redactionLogAdapter.toRedactionEntity(originalRedactionLog, document);
var migratedRedactionLogEntries = redactionLogCreatorService.createRedactionLog(document, TEST_DOSSIER_TEMPLATE_ID, notFoundManualRedactionEntries);
var migratedRedactionLogEntries = redactionLogCreatorService.createRedactionLog(document, TEST_DOSSIER_TEMPLATE_ID, notFoundManualRedactionEntries, Collections.emptyMap());
Map<String, RedactionLogEntry> migratedIds = migratedRedactionLogEntries.stream().collect(toMap(RedactionLogEntry::getId, Functions.identity()));
Map<String, RedactionLogEntry> newIds = newRedactionLog.getRedactionLogEntry().stream().collect(toMap(RedactionLogEntry::getId, Functions.identity()));

View File

@ -8,7 +8,7 @@ import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
@ -27,7 +27,7 @@ public class SearchImplementationTest extends BuildDocumentIntegrationTest {
SearchImplementation searchImplementation = new SearchImplementation(List.of("mydossierredaction"), true);
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService);
List<RedactionEntity> entities = entityCreationService.bySearchImplementation(searchImplementation, "dossier_redaction", EntityType.ENTITY, document).toList();
List<TextEntity> entities = entityCreationService.bySearchImplementation(searchImplementation, "dossier_redaction", EntityType.ENTITY, document).toList();
assertEquals(2, entities.size());
}

View File

@ -0,0 +1,85 @@
package com.iqser.red.service.redaction.v1.server.document.graph;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.Collections;
import java.util.List;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
class TextRangeTest {
TextRange startTextRange;
@BeforeEach
void setUp() {
startTextRange = new TextRange(10, 100);
}
@Test
void testContains() {
assertTrue(startTextRange.contains(11));
assertTrue(startTextRange.contains(50));
assertFalse(startTextRange.contains(9));
assertFalse(startTextRange.contains(100));
assertFalse(startTextRange.contains(150));
assertFalse(startTextRange.contains(-123));
assertTrue(startTextRange.contains(new TextRange(11, 99)));
assertTrue(startTextRange.contains(new TextRange(10, 100)));
assertTrue(startTextRange.contains(new TextRange(11, 11)));
assertFalse(startTextRange.contains(9, 100));
assertTrue(startTextRange.contains(100, 100));
assertFalse(startTextRange.contains(100, 101));
assertFalse(startTextRange.contains(150, 151));
}
@Test
void testIntersects() {
assertTrue(startTextRange.intersects(new TextRange(1, 11)));
assertTrue(startTextRange.intersects(new TextRange(11, 12)));
assertTrue(startTextRange.intersects(new TextRange(11, 100)));
assertFalse(startTextRange.intersects(new TextRange(100, 101)));
assertFalse(startTextRange.intersects(new TextRange(9, 10)));
assertFalse(startTextRange.intersects(new TextRange(0, 1)));
assertFalse(startTextRange.intersects(new TextRange(1000, 1001)));
assertTrue(startTextRange.intersects(new TextRange(99, 101)));
assertTrue(startTextRange.intersects(new TextRange(99, 101)));
assertTrue(startTextRange.intersects(new TextRange(9, 101)));
}
@Test
void testSplit() {
assertEquals(4, startTextRange.split(List.of(12, 40, 90)).size());
assertEquals(List.of(new TextRange(10, 12), new TextRange(12, 40), new TextRange(40, 90), new TextRange(90, 100)), startTextRange.split(List.of(12, 40, 90)));
assertEquals(List.of(new TextRange(10, 40), new TextRange(40, 100)), startTextRange.split(List.of(40)));
assertEquals(1, startTextRange.split(Collections.emptyList()).size());
assertEquals(1, startTextRange.split(List.of(startTextRange.start())).size());
assertThrows(IndexOutOfBoundsException.class, () -> startTextRange.split(Collections.singletonList(0)));
assertThrows(IndexOutOfBoundsException.class, () -> startTextRange.split(Collections.singletonList(100)));
assertThrows(IndexOutOfBoundsException.class, () -> startTextRange.split(List.of(12, 40, 100)));
}
@Test
void testCompareTo() {
TextRange beforeTextRange = new TextRange(1, 8);
TextRange afterTextRange = new TextRange(101, 102);
assertEquals(-1, beforeTextRange.compareTo(startTextRange));
assertEquals(1, afterTextRange.compareTo(startTextRange));
}
}

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.document.graph;
package com.iqser.red.service.redaction.v1.server.manualchanges;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.mockito.Mockito.when;
@ -6,6 +6,7 @@ import static org.wildfly.common.Assert.assertFalse;
import static org.wildfly.common.Assert.assertTrue;
import java.awt.geom.Rectangle2D;
import java.util.Collections;
import java.util.List;
import java.util.Set;
@ -19,19 +20,20 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.server.document.graph.BuildDocumentIntegrationTest;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.CustomEntityCreationAdapter;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier;
import com.iqser.red.service.redaction.v1.server.redaction.model.ManualEntity;
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService;
import lombok.SneakyThrows;
public class ManualRedactionEntryTest extends BuildDocumentIntegrationTest {
public class CustomEntityCreationAdapterTest extends BuildDocumentIntegrationTest {
@Autowired
private EntityEnrichmentService entityEnrichmentService;
@ -48,7 +50,7 @@ public class ManualRedactionEntryTest extends BuildDocumentIntegrationTest {
@BeforeEach
public void stubMethods() {
MockitoAnnotations.openMocks(this);
when(dictionaryService.getColor(DICTIONARY_AUTHOR, TEST_DOSSIER_TEMPLATE_ID)).thenReturn(new float[]{0f, 0f, 0f});
}
@ -61,10 +63,10 @@ public class ManualRedactionEntryTest extends BuildDocumentIntegrationTest {
Document document = buildGraph("files/new/VV-919901.pdf");
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService);
List<RedactionEntity> tempEntities = entityCreationService.byString("To: Syngenta Ltd.", "temp", EntityType.ENTITY, document).toList();
List<TextEntity> tempEntities = entityCreationService.byString("To: Syngenta Ltd.", "temp", EntityType.ENTITY, document).toList();
assertFalse(tempEntities.isEmpty());
var tempEntity = tempEntities.get(0);
List<Rectangle> positions = tempEntity.getRedactionPositionsPerPage()
List<Rectangle> positions = tempEntity.getPositionsOnPagePerPage()
.stream()
.flatMap(redactionPosition -> redactionPosition.getRectanglePerLine()
.stream()
@ -86,9 +88,8 @@ public class ManualRedactionEntryTest extends BuildDocumentIntegrationTest {
tempEntity.removeFromGraph();
assertTrue(document.getEntities().isEmpty());
List<EntityIdentifier> notFoundEntityIdentifiers = customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set.of(manualRedactionEntry),
document);
assertTrue(notFoundEntityIdentifiers.isEmpty());
List<ManualEntity> notFoundManualEntities = customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set.of(manualRedactionEntry), document);
assertTrue(notFoundManualEntities.isEmpty());
assertEquals(1, document.getEntities().size());
}
@ -115,12 +116,14 @@ public class ManualRedactionEntryTest extends BuildDocumentIntegrationTest {
assertTrue(document.getEntities().isEmpty());
List<EntityIdentifier> notFoundEntityIdentifiers = customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set.of(manualRedactionEntry),
document);
assertEquals(1, notFoundEntityIdentifiers.size());
List<ManualEntity> notFoundManualEntities = customEntityCreationAdapter.createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set.of(manualRedactionEntry), document);
assertEquals(1, notFoundManualEntities.size());
assertTrue(document.getEntities().isEmpty());
List<RedactionLogEntry> redactionLogEntries = redactionLogCreatorService.createRedactionLog(document, TEST_DOSSIER_TEMPLATE_ID, notFoundEntityIdentifiers);
List<RedactionLogEntry> redactionLogEntries = redactionLogCreatorService.createRedactionLog(document,
TEST_DOSSIER_TEMPLATE_ID,
notFoundManualEntities,
Collections.emptyMap());
assertEquals(1, redactionLogEntries.size());
assertEquals(value, redactionLogEntries.get(0).getValue());

View File

@ -0,0 +1,323 @@
package com.iqser.red.service.redaction.v1.server.manualchanges;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.mockito.Mockito.when;
import static org.wildfly.common.Assert.assertTrue;
import java.awt.geom.Rectangle2D;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Paths;
import java.time.OffsetDateTime;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.FilterType;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Comment;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ChangeType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.server.AbstractRedactionIntegrationTest;
import com.iqser.red.service.redaction.v1.server.Application;
import com.iqser.red.service.redaction.v1.server.FileSystemBackedStorageService;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
import com.iqser.red.service.redaction.v1.server.document.data.mapper.DocumentGraphMapper;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration;
import com.knecon.fforesight.tenantcommons.TenantContext;
import lombok.SneakyThrows;
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(ManualChangesEnd2EndTest.TestConfiguration.class)
public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest {
private static final String RULES = loadFromClassPath("drools/rules.drl");
@Autowired
private EntityEnrichmentService entityEnrichmentService;
private EntityCreationService entityCreationService;
@Configuration
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
@Import(LayoutParsingServiceProcessorConfiguration.class)
@ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)})
public static class TestConfiguration {
@Bean
@Primary
public StorageService inmemoryStorage() {
return new FileSystemBackedStorageService();
}
}
@BeforeEach
public void createServices() {
entityCreationService = new EntityCreationService(entityEnrichmentService);
}
@BeforeEach
public void stubClients() {
TenantContext.setTenantId("redaction");
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(System.currentTimeMillis());
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(RULES));
loadDictionaryForTest();
loadTypeForTest();
loadNerForTest();
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, false)).thenReturn(getTypeResponse());
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS_INDICATOR)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build()));
mockDictionaryCalls(null);
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
}
@Test
@SneakyThrows
public void testManualResizeRedactionRemovesContainedEntities() {
String filePath = "files/new/crafted document.pdf";
AnalyzeRequest request = uploadFileToStorage(filePath);
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
AnalyzeResult result = analyzeService.analyze(request);
String testEntityValue1 = "Desiree";
String testEntityValue2 = "Melanie";
RedactionLog redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
assertEquals(2, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue1)).count());
assertEquals(2, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue2)).count());
Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(TEST_DOSSIER_ID, TEST_FILE_ID));
String expandedEntityKeyword = "Lorem ipsum dolor sit amet, consectetur adipiscing elit Desiree et al sed do eiusmod tempor incididunt ut labore et dolore magna aliqua Melanie et al. Reference No 12345 Lorem ipsum.";
TextEntity expandedEntity = entityCreationService.byString(expandedEntityKeyword, "PII", EntityType.ENTITY, document).findFirst().get();
String idToResize = redactionLog.getRedactionLogEntry()
.stream()
.filter(entry -> entry.getValue().equals(testEntityValue1))
.max(Comparator.comparingInt(RedactionLogEntry::getStartOffset))
.get()
.getId();
List<Rectangle> resizedPositions = expandedEntity.getPositionsOnPagePerPage()
.get(0)
.getRectanglePerLine()
.stream()
.map(rectangle2D -> toAnnotationRectangle(rectangle2D, 3))
.toList();
ManualResizeRedaction manualResizeRedaction = ManualResizeRedaction.builder()
.annotationId(idToResize)
.value(expandedEntityKeyword)
.positions(resizedPositions)
.status(AnnotationStatus.APPROVED)
.build();
ManualRedactions manualRedactions = new ManualRedactions();
manualRedactions.getResizeRedactions().add(manualResizeRedaction);
request.setManualRedactions(manualRedactions);
AnalyzeResult reanalyzeResult = analyzeService.reanalyze(request);
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
String annotatedFileName = Paths.get(filePath).getFileName().toString().replace(".pdf", "_annotated2.pdf");
File tmpFile = Paths.get(OsUtils.getTemporaryDirectory(), annotatedFileName).toFile();
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
try (FileOutputStream fileOutputStream = new FileOutputStream(tmpFile)) {
fileOutputStream.write(annotateResponse.getDocument());
}
RedactionLogEntry resizedEntry = redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(expandedEntityKeyword)).findFirst().get();
assertTrue(resizedEntry.getChanges().get(resizedEntry.getChanges().size() - 1).getType().equals(ChangeType.CHANGED));
assertEquals(idToResize, resizedEntry.getId());
assertEquals(1, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue1)).count());
assertEquals(1, redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue2) && !entry.lastChangeIsRemoved()).count());
}
private static com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle toAnnotationRectangle(Rectangle2D rectangle2D, int pageNumber) {
return new com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle((float) rectangle2D.getMaxX(),
(float) rectangle2D.getMaxY() - (float) rectangle2D.getHeight(),
(float) rectangle2D.getWidth(),
-(float) rectangle2D.getHeight(),
pageNumber);
}
@Test
public void testManualRedaction() throws IOException {
System.out.println("testManualRedaction");
long start = System.currentTimeMillis();
String pdfFile = "files/Minimal Examples/Single Table.pdf";
ManualRedactions manualRedactions = new ManualRedactions();
String manualAddId = UUID.randomUUID().toString();
Comment comment = Comment.builder().date(OffsetDateTime.now()).user("TEST_USER").text("This is a comment test").build();
manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf").fileId("fileId").status(AnnotationStatus.DECLINED).build()));
manualRedactions.setForceRedactions(Set.of(ManualForceRedaction.builder()
.annotationId("675eba69b0c2917de55462c817adaa05")
.fileId("fileId")
.legalBasis("Something")
.status(AnnotationStatus.APPROVED)
.build()));
manualRedactions.getComments().put("e5be0f1d941bbb92a068e198648d06c4", List.of(comment));
manualRedactions.getComments().put("0836727c3508a0b2ea271da69c04cc2f", List.of(comment));
manualRedactions.getComments().put(manualAddId, List.of(comment));
ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry();
manualRedactionEntry.setAnnotationId(manualAddId);
manualRedactionEntry.setFileId("fileId");
manualRedactionEntry.setStatus(AnnotationStatus.REQUESTED);
manualRedactionEntry.setType("name");
manualRedactionEntry.setValue("O'Loughlin C.K.");
manualRedactionEntry.setReason("Manual Redaction");
manualRedactionEntry.setPositions(List.of(Rectangle.builder().topLeftX(375.61096f).topLeftY(241.282f).width(7.648041f).height(43.72262f).page(1).build(),
Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build()));
AnalyzeRequest request = uploadFileToStorage(pdfFile);
request.setManualRedactions(manualRedactions);
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
AnalyzeResult result = analyzeService.analyze(request);
manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf").fileId("fileId").status(AnnotationStatus.APPROVED).build()));
manualRedactions.setLegalBasisChanges((Set.of(ManualLegalBasisChange.builder()
.annotationId("675eba69b0c2917de55462c817adaa05")
.fileId("fileId")
.legalBasis("Manual Legal Basis Change")
.status(AnnotationStatus.APPROVED)
.build())));
analyzeService.reanalyze(request);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) {
fileOutputStream.write(annotateResponse.getDocument());
}
long end = System.currentTimeMillis();
System.out.println("duration: " + (end - start));
System.out.println("numberOfPages: " + result.getNumberOfPages());
}
@Test
public void testReCategorizeToVertebrateChangesCbiAuthor() {
String filePath = "files/new/crafted document.pdf";
AnalyzeRequest request = uploadFileToStorage(filePath);
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
AnalyzeResult result = analyzeService.analyze(request);
RedactionLog redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var oxfordUniversityPress = redactionLog.getRedactionLogEntry()
.stream()
.filter(entry -> entry.getType().equals("published_information"))
.filter(entry -> entry.getValue().equals("Oxford University Press"))
.findFirst()
.get();
var asyaLyon = redactionLog.getRedactionLogEntry()
.stream()
.filter(entry -> entry.getType().equals("CBI_author"))
.filter(entry -> entry.getValue().equals("Asya Lyon"))
.findFirst()
.get();
assertEquals("CBI.3.2", asyaLyon.getMatchedRule());
assertEquals("No vertebrate found", asyaLyon.getReason());
ManualImageRecategorization recategorization = ManualImageRecategorization.builder()
.requestDate(OffsetDateTime.now())
.status(AnnotationStatus.APPROVED)
.type("vertebrate")
.annotationId(oxfordUniversityPress.getId())
.fileId(TEST_FILE_ID)
.build();
request.setManualRedactions(new ManualRedactions());
request.getManualRedactions().setImageRecategorization(Set.of(recategorization));
analyzeService.reanalyze(request);
RedactionLog redactionLog2 = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
assertFalse(redactionLog2.getRedactionLogEntry()
.stream()
.filter(entry -> entry.getType().equals("published_information"))
.anyMatch(entry -> entry.getValue().equals("Oxford University Press")));
var oxfordUniversityPressRecategorized = redactionLog2.getRedactionLogEntry()
.stream()
.filter(entry -> entry.getType().equals("vertebrate"))
.filter(entry -> entry.getValue().equals("Oxford University Press"))
.findFirst()
.get();
assertEquals(1, oxfordUniversityPressRecategorized.getManualChanges().size());
}
}

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.document.graph;
package com.iqser.red.service.redaction.v1.server.manualchanges;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
@ -31,24 +31,25 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.redaction.v1.server.document.graph.BuildDocumentIntegrationTest;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.PositionOnPage;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Paragraph;
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService;
import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService;
@Import(ManualResizeRedactionIntegrationTest.TestConfiguration.class)
public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrationTest {
@Import(ManualChangesIntegrationTest.TestConfiguration.class)
public class ManualChangesIntegrationTest extends BuildDocumentIntegrationTest {
private static final String RULES = "drools/manual_redaction_rules.drl";
@Autowired
private EntityEnrichmentService entityEnrichmentService;
private EntityCreationService entityCreationService;
private ManualRedactionApplicationService manualRedactionApplicationService;
private ManualChangesApplicationService manualChangesApplicationService;
@Qualifier("kieContainer")
@Autowired
@ -79,7 +80,7 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
public void createServices() {
entityCreationService = new EntityCreationService(entityEnrichmentService);
manualRedactionApplicationService = new ManualRedactionApplicationService(entityCreationService);
manualChangesApplicationService = new ManualChangesApplicationService(entityCreationService);
}
@ -87,23 +88,23 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
public void manualResizeRedactionTest() {
Document document = buildGraph("files/new/crafted document");
Set<RedactionEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
Set<RedactionEntity> biggerEntities = entityCreationService.byString("David Ksenia Max Mustermann", "CBI_author", EntityType.ENTITY, document)
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
Set<TextEntity> biggerEntities = entityCreationService.byString("David Ksenia Max Mustermann", "CBI_author", EntityType.ENTITY, document)
.collect(Collectors.toUnmodifiableSet());
RedactionEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
RedactionEntity biggerEntity = biggerEntities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
TextEntity biggerEntity = biggerEntities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
String initialId = entity.getRedactionPositionsPerPage().get(0).getId();
String initialId = entity.getPositionsOnPagePerPage().get(0).getId();
ManualResizeRedaction manualResizeRedaction = ManualResizeRedaction.builder()
.annotationId(initialId)
.value(biggerEntity.getValue())
.positions(toAnnotationRectangles(biggerEntity.getRedactionPositionsPerPage().get(0)))
.positions(toAnnotationRectangles(biggerEntity.getPositionsOnPagePerPage().get(0)))
.status(AnnotationStatus.APPROVED)
.build();
KieSession kieSession = kieContainer.newKieSession();
kieSession.setGlobal("manualRedactionApplicationService", manualRedactionApplicationService);
kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService);
kieSession.insert(document);
document.streamAllSubNodes().forEach(kieSession::insert);
kieSession.insert(entity);
@ -111,14 +112,14 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
kieSession.fireAllRules();
kieSession.dispose();
assertEquals(biggerEntity.getBoundary(), entity.getBoundary());
assertEquals(biggerEntity.getTextRange(), entity.getTextRange());
assertEquals(biggerEntity.getDeepestFullyContainingNode(), entity.getDeepestFullyContainingNode());
assertEquals(biggerEntity.getIntersectingNodes(), entity.getIntersectingNodes());
assertEquals(biggerEntity.getPages(), entity.getPages());
assertEquals(biggerEntity.getValue(), entity.getValue());
assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId());
assertRectanglesAlmostEqual(biggerEntity.getRedactionPositionsPerPage().get(0).getRectanglePerLine(), entity.getRedactionPositionsPerPage().get(0).getRectanglePerLine());
assertTrue(entity.isResized());
assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId());
assertRectanglesAlmostEqual(biggerEntity.getPositionsOnPagePerPage().get(0).getRectanglePerLine(), entity.getPositionsOnPagePerPage().get(0).getRectanglePerLine());
assertTrue(entity.resized());
}
@ -126,11 +127,11 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
public void manualForceRedactionTest() {
Document document = buildGraph("files/new/crafted document");
Set<RedactionEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
RedactionEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
String initialId = entity.getRedactionPositionsPerPage().get(0).getId();
String initialId = entity.getPositionsOnPagePerPage().get(0).getId();
ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder()
.annotationId(initialId)
.status(AnnotationStatus.APPROVED)
@ -139,7 +140,7 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
.build();
KieSession kieSession = kieContainer.newKieSession();
kieSession.setGlobal("manualRedactionApplicationService", manualRedactionApplicationService);
kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService);
kieSession.insert(entity);
kieSession.insert(manualForceRedaction);
kieSession.insert(document);
@ -151,11 +152,11 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
assertFalse(entity.getIntersectingNodes().isEmpty());
assertEquals(1, entity.getPages().size());
assertEquals("David Ksenia", entity.getValue());
assertEquals("Something", entity.getMatchedRule().getLegalBasis());
assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId());
assertFalse(entity.isRemoved());
assertTrue(entity.isSkipRemoveEntitiesContainedInLarger());
assertTrue(entity.isApplied());
assertEquals("Something", entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis()));
assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId());
assertFalse(entity.removed());
assertTrue(entity.hasManualChanges());
assertTrue(entity.applied());
}
@ -163,15 +164,15 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
public void manualIDRemovalTest() {
Document document = buildGraph("files/new/crafted document");
Set<RedactionEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
RedactionEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
String initialId = entity.getRedactionPositionsPerPage().get(0).getId();
String initialId = entity.getPositionsOnPagePerPage().get(0).getId();
IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).status(AnnotationStatus.APPROVED).requestDate(OffsetDateTime.now()).build();
KieSession kieSession = kieContainer.newKieSession();
kieSession.setGlobal("manualRedactionApplicationService", manualRedactionApplicationService);
kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService);
kieSession.insert(document);
document.streamAllSubNodes().forEach(kieSession::insert);
kieSession.insert(entity);
@ -180,8 +181,8 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
kieSession.dispose();
assertEquals("David Ksenia", entity.getValue());
assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId());
assertTrue(entity.isIgnored());
assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId());
assertTrue(entity.ignored());
}
@ -189,11 +190,11 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
public void manualIDRemovalButAlsoForceRedactionTest() {
Document document = buildGraph("files/new/crafted document");
Set<RedactionEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
RedactionEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
String initialId = entity.getRedactionPositionsPerPage().get(0).getId();
String initialId = entity.getPositionsOnPagePerPage().get(0).getId();
IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).status(AnnotationStatus.APPROVED).requestDate(OffsetDateTime.now()).build();
ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder()
.annotationId(initialId)
@ -203,7 +204,7 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
.build();
KieSession kieSession = kieContainer.newKieSession();
kieSession.setGlobal("manualRedactionApplicationService", manualRedactionApplicationService);
kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService);
kieSession.insert(document);
document.streamAllSubNodes().forEach(kieSession::insert);
kieSession.insert(entity);
@ -216,9 +217,9 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
assertFalse(entity.getIntersectingNodes().isEmpty());
assertEquals(1, entity.getPages().size());
assertEquals("David Ksenia", entity.getValue());
assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId());
assertFalse(entity.isRemoved());
assertFalse(entity.isIgnored());
assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId());
assertFalse(entity.removed());
assertFalse(entity.ignored());
}
@ -226,15 +227,15 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
public void manualIDRemovalNotApprovedTest() {
Document document = buildGraph("files/new/crafted document");
Set<RedactionEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
Set<TextEntity> entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet());
RedactionEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get();
String initialId = entity.getRedactionPositionsPerPage().get(0).getId();
String initialId = entity.getPositionsOnPagePerPage().get(0).getId();
IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).status(AnnotationStatus.REQUESTED).build();
KieSession kieSession = kieContainer.newKieSession();
kieSession.setGlobal("manualRedactionApplicationService", manualRedactionApplicationService);
kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService);
kieSession.insert(entity);
kieSession.insert(idRemoval);
kieSession.insert(document);
@ -246,8 +247,9 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
assertFalse(entity.getIntersectingNodes().isEmpty());
assertEquals(1, entity.getPages().size());
assertEquals("David Ksenia", entity.getValue());
assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId());
assertFalse(entity.isRemoved());
assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId());
assertFalse(entity.ignored());
assertFalse(entity.removed());
}
@ -271,9 +273,9 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
}
private static List<Rectangle> toAnnotationRectangles(RedactionPosition redactionPositions) {
private static List<Rectangle> toAnnotationRectangles(PositionOnPage positionsOnPage) {
return redactionPositions.getRectanglePerLine().stream().map(rectangle2D -> toAnnotationRectangle(rectangle2D, redactionPositions.getPage().getNumber())).toList();
return positionsOnPage.getRectanglePerLine().stream().map(rectangle2D -> toAnnotationRectangle(rectangle2D, positionsOnPage.getPage().getNumber())).toList();
}

View File

@ -0,0 +1,134 @@
package com.iqser.red.service.redaction.v1.server.manualchanges;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.time.OffsetDateTime;
import java.util.List;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.iqser.red.service.redaction.v1.server.document.graph.BuildDocumentIntegrationTest;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
public class ManualChangesUnitTest extends BuildDocumentIntegrationTest {
@Autowired
private EntityEnrichmentService entityEnrichmentService;
private EntityCreationService entityCreationService;
@BeforeEach
public void createServices() {
entityCreationService = new EntityCreationService(entityEnrichmentService);
}
@Test
public void testBasicOverrides() {
OffsetDateTime start = OffsetDateTime.now();
String reason = "whatever";
Document document = buildGraphNoImages("files/new/crafted document.pdf");
List<TextEntity> entities = entityCreationService.byString("David Ksenia", "test", EntityType.ENTITY, document).peek(e -> e.apply("T.0.0", reason)).toList();
assertFalse(entities.isEmpty());
TextEntity entity = entities.get(0);
assertTrue(entity.active());
assertTrue(entity.applied());
assertFalse(entity.removed());
assertFalse(entity.resized());
assertFalse(entity.ignored());
assertEquals("n-a", entity.getMatchedRule().getLegalBasis());
String annotationId = entity.getPositionsOnPagePerPage().get(0).getId();
// remove first
IdRemoval removal = IdRemoval.builder().requestDate(start).fileId(TEST_FILE_ID).annotationId(annotationId).status(AnnotationStatus.APPROVED).build();
entity.getManualOverwrite().addChange(removal);
assertTrue(entity.ignored());
assertFalse(entity.applied());
assertEquals(reason + ", removed by manual override", entity.buildReasonWithManualChangeDescriptions());
// force again
ManualForceRedaction forceRedaction = ManualForceRedaction.builder()
.requestDate(start.plusSeconds(1))
.fileId(TEST_FILE_ID)
.annotationId(annotationId)
.legalBasis("coolio")
.status(AnnotationStatus.APPROVED)
.build();
entity.getManualOverwrite().addChange(forceRedaction);
assertTrue(entity.applied());
assertFalse(entity.ignored());
assertFalse(entity.removed());
assertEquals(reason + ", removed by manual override, forced by manual override", entity.buildReasonWithManualChangeDescriptions());
assertEquals("coolio", entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis()));
// remove again
IdRemoval removal2 = IdRemoval.builder().requestDate(start.plusSeconds(3)).fileId(TEST_FILE_ID).annotationId(annotationId).status(AnnotationStatus.APPROVED).build();
entity.getManualOverwrite().addChange(removal2);
assertTrue(entity.ignored());
assertFalse(entity.applied());
assertEquals(reason + ", removed by manual override, forced by manual override, removed by manual override", entity.buildReasonWithManualChangeDescriptions());
// force again, with requestDate before removal2, but after force1
ManualForceRedaction forceRedaction2 = ManualForceRedaction.builder()
.requestDate(start.plusSeconds(2))
.fileId(TEST_FILE_ID)
.annotationId(annotationId)
.legalBasis("coolio")
.status(AnnotationStatus.APPROVED)
.build();
entity.getManualOverwrite().addChange(forceRedaction2);
assertTrue(entity.ignored());
assertFalse(entity.applied());
assertEquals(reason + ", removed by manual override, forced by manual override, forced by manual override, removed by manual override",
entity.buildReasonWithManualChangeDescriptions());
String legalBasis = "Yeah";
String section = "Some random section!";
String value = "Some random value!";
ManualLegalBasisChange legalBasisChange = ManualLegalBasisChange.builder()
.legalBasis(legalBasis)
.annotationId(annotationId)
.requestDate(start.plusSeconds(4))
.section(section)
.status(AnnotationStatus.APPROVED)
.user("peter")
.value(value)
.build();
entity.getManualOverwrite().addChange(legalBasisChange);
assertTrue(entity.ignored());
assertFalse(entity.applied());
assertEquals(reason + ", removed by manual override, forced by manual override, forced by manual override, removed by manual override, legal basis was manually changed",
entity.buildReasonWithManualChangeDescriptions());
assertEquals(value, entity.getManualOverwrite().getValue().orElse(entity.getValue()));
assertEquals(legalBasis, entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis()));
assertEquals(section, entity.getManualOverwrite().getSection().orElse(entity.getDeepestFullyContainingNode().toString()));
ManualImageRecategorization imageRecategorizationRequest = ManualImageRecategorization.builder()
.type("type")
.requestDate(start.plusSeconds(5))
.annotationId(annotationId)
.status(AnnotationStatus.APPROVED)
.build();
entity.getManualOverwrite().addChange(imageRecategorizationRequest);
assertTrue(entity.getManualOverwrite().getRecategorized().isPresent());
assertTrue(entity.getManualOverwrite().getRecategorized().get());
assertEquals("type", entity.getManualOverwrite().getType().orElse(entity.getType()));
}
}

View File

@ -5,9 +5,6 @@ import java.util.List;
import java.util.Set;
import org.junit.jupiter.api.Test;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.core.io.ClassPathResource;
import com.fasterxml.jackson.databind.ObjectMapper;
@ -15,7 +12,6 @@ import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.MessageType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@ -25,9 +21,6 @@ public class AnalyseFileRealDataIntegrationTest extends LiveDataIntegrationTest
public static final String FILE_NAME = "test-file";
@Autowired
private AnnotationService annotationService;
@Test
@SneakyThrows
@ -57,26 +50,15 @@ public class AnalyseFileRealDataIntegrationTest extends LiveDataIntegrationTest
.fileAttributes(List.of())
.build();
try {
var text = new ClassPathResource(BASE_DIR + "data/" + FILE_NAME + ".text.json").getInputStream();
var sectionText = new ClassPathResource(BASE_DIR + "data/" + FILE_NAME + ".section-grid.json").getInputStream();
redactionStorageService.storeObject("dossierId", "fileId", FileType.TEXT, text);
redactionStorageService.storeObject("dossierId", "fileId", FileType.SECTION_GRID, sectionText);
} catch (Exception e) {
log.info("No text file provided, Performing Structure analysis");
ar.setMessageType(MessageType.STRUCTURE_ANALYSE);
redactionMessageReceiver.receiveAnalyzeRequest(ar, false);
}
try {
var redactionLog = new ClassPathResource(BASE_DIR + "data/" + FILE_NAME + ".redaction-log.json").getInputStream();
} catch (Exception e) {
log.info("No redaction log provided, Performing full analysis");
ar.setMessageType(MessageType.ANALYSE);
redactionMessageReceiver.receiveAnalyzeRequest(ar, false);
}
// try {
// var redactionLog = new ClassPathResource(BASE_DIR + "data/" + FILE_NAME + ".redaction-log.json").getInputStream();
// } catch (Exception e) {
// log.info("No redaction log provided, Performing full analysis");
//
// ar.setMessageType(MessageType.ANALYSE);
// redactionMessageReceiver.receiveAnalyzeRequest(ar, false);
// }
simulateIncrement(List.of("Desiree"), "PII", 3L);
ar.setMessageType(MessageType.REANALYSE);

View File

@ -22,10 +22,10 @@ import org.springframework.core.io.ClassPathResource;
import com.iqser.red.commons.jackson.ObjectMapperFactory;
import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel;
import com.iqser.red.service.redaction.v1.server.document.graph.BuildDocumentIntegrationTest;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.PositionOnPage;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
@ -66,7 +66,7 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
Document document = buildGraphNoImages(filePath);
List<NerEntities.NerEntity> entityRecognitionEntities = validateAndCombine(parseNerEntities(nerEntitiesFilePath), document);
assertFalse(entityRecognitionEntities.isEmpty());
assertTrue(entityRecognitionEntities.stream().allMatch(entity -> entity.boundary().start() < entity.boundary().end()));
assertTrue(entityRecognitionEntities.stream().allMatch(entity -> entity.textRange().start() < entity.textRange().end()));
ClassPathResource resource = new ClassPathResource(filePath);
try (PDDocument pdDocument = Loader.loadPDF(resource.getInputStream())) {
@ -75,8 +75,8 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
.getNerEntityList()
.stream()
.filter(e -> !e.type().equals("CBI_author"));
List<RedactionEntity> redactionEntities = Stream.concat(entityRecognitionEntities.stream(), unchangedAddressParts)
.map(e -> entityCreationService.byBoundary(e.boundary(), e.type(), EntityType.ENTITY, document))
List<TextEntity> redactionEntities = Stream.concat(entityRecognitionEntities.stream(), unchangedAddressParts)
.map(e -> entityCreationService.byBoundary(e.textRange(), e.type(), EntityType.ENTITY, document))
.filter(Optional::isPresent)
.map(Optional::get)
.toList();
@ -107,23 +107,23 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
log.info("Parsed NerEntitiesModel");
NerEntities nerEntities = NerEntitiesAdapter.toNerEntities(nerEntitiesModel, document);
log.info("Validated and mapped");
List<Boundary> nerEntityBoundaries = NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities).toList();
List<TextRange> nerEntityBoundaries = NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities).toList();
log.info("Combined to CBI_address");
List<RedactionEntity> cbiAddressEntities = nerEntityBoundaries.stream()
List<TextEntity> cbiAddressEntities = nerEntityBoundaries.stream()
.map(b -> entityCreationService.byBoundary(b, "CBI_address", EntityType.RECOMMENDATION, document))
.filter(Optional::isPresent)
.map(Optional::get)
.toList();
assertFalse(cbiAddressEntities.isEmpty());
assertTrue(cbiAddressEntities.stream().allMatch(entity -> entity.getBoundary().start() < entity.getBoundary().end()));
assertTrue(cbiAddressEntities.stream().allMatch(entity -> entity.getTextRange().start() < entity.getTextRange().end()));
ClassPathResource resource = new ClassPathResource(filePath);
try (PDDocument pdDocument = Loader.loadPDF(resource.getInputStream())) {
List<RedactionEntity> validatedEntities = NerEntitiesAdapter.toNerEntities(parseNerEntities(nerEntitiesFilePath), document)
List<TextEntity> validatedEntities = NerEntitiesAdapter.toNerEntities(parseNerEntities(nerEntitiesFilePath), document)
.getNerEntityList()
.stream()
.map(e -> entityCreationService.byBoundary(e.boundary(), e.type(), EntityType.ENTITY, document))
.map(e -> entityCreationService.byBoundary(e.textRange(), e.type(), EntityType.ENTITY, document))
.filter(Optional::isPresent)
.map(Optional::get)
.toList();
@ -153,24 +153,24 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
}
private List<Rectangle2D> getPositionsFromEntities(Stream<RedactionEntity> entities) {
private List<Rectangle2D> getPositionsFromEntities(Stream<TextEntity> entities) {
return entities.map(RedactionEntity::getRedactionPositionsPerPage)
return entities.map(TextEntity::getPositionsOnPagePerPage)
.flatMap(Collection::stream)
.map(RedactionPosition::getRectanglePerLine)
.map(PositionOnPage::getRectanglePerLine)
.flatMap(Collection::stream)
.toList();
}
private List<Rectangle2D> getPositionsFromEntityOfType(String type, List<RedactionEntity> entities) {
private List<Rectangle2D> getPositionsFromEntityOfType(String type, List<TextEntity> entities) {
return getPositionsFromEntities(entities.stream().filter(e -> e.getType().equals(type)));
}
private List<Rectangle2D> getPositionsFromEntityNotOfType(List<String> types, List<RedactionEntity> entities) {
private List<Rectangle2D> getPositionsFromEntityNotOfType(List<String> types, List<TextEntity> entities) {
return getPositionsFromEntities(entities.stream().filter(e -> types.stream().noneMatch(type -> e.getType().equals(type))));

View File

@ -43,6 +43,19 @@ class DroolsExecutionServiceTest {
assertTrue(droolsSyntaxValidation.isCompiled());
}
@Test
@SneakyThrows
void testAllRules() {
DroolsExecutionService droolsExecutionService = new DroolsExecutionService(rulesClient, entityEnrichmentService, new DroolsSyntaxValidationFactory());
var rulesFile = new ClassPathResource("drools/all_rules.drl");
String rulesString = new String(rulesFile.getInputStream().readAllBytes());
DroolsSyntaxValidation droolsSyntaxValidation = droolsExecutionService.testRules(rulesString);
assertTrue(droolsSyntaxValidation.isCompiled());
}
@Test
@SneakyThrows

View File

@ -33,23 +33,24 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService;
import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
global Document document
global EntityCreationService entityCreationService
global ManualRedactionApplicationService manualRedactionApplicationService
global ManualChangesApplicationService manualChangesApplicationService
global Dictionary dictionary
//------------------------------------ queries ------------------------------------
@ -77,7 +78,7 @@ rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL"
rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_author", dictionaryEntry)
$entity: TextEntity(type == "CBI_author", dictionaryEntry)
then
$entity.apply("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
@ -85,7 +86,7 @@ rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)"
rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_author", dictionaryEntry)
$entity: TextEntity(type == "CBI_author", dictionaryEntry)
then
$entity.apply("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
@ -95,7 +96,7 @@ rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)"
rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_address", dictionaryEntry)
$entity: TextEntity(type == "CBI_address", dictionaryEntry)
then
$entity.skip("CBI.1.0", "Address found for Non Vertebrate Study");
end
@ -103,7 +104,7 @@ rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)"
rule "CBI.1.1: Redact CBI Address (Vertebrate Study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_address", dictionaryEntry)
$entity: TextEntity(type == "CBI_address", dictionaryEntry)
then
$entity.apply("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
@ -112,9 +113,9 @@ rule "CBI.1.1: Redact CBI Address (Vertebrate Study)"
// Rule unit: CBI.2
rule "CBI.2.0: Don't redact genitive CBI_author"
when
$entity: RedactionEntity(type == "CBI_author", anyMatch(textAfter, "[''ʼˈ´`ʻ']s"), isApplied())
$entity: TextEntity(type == "CBI_author", anyMatch(textAfter, "[''ʼˈ´`ʻ']s"), applied())
then
entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document)
entityCreationService.byBoundary($entity.getTextRange(), "CBI_author", EntityType.FALSE_POSITIVE, document)
.ifPresent(falsePositive -> falsePositive.skip("CBI.2.0", "Genitive Author found"));
end
@ -299,7 +300,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC
rule "PII.0.0: Redact all PII (non vertebrate study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$pii: RedactionEntity(type == "PII", dictionaryEntry)
$pii: TextEntity(type == "PII", dictionaryEntry)
then
$pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
@ -307,7 +308,7 @@ rule "PII.0.0: Redact all PII (non vertebrate study)"
rule "PII.0.1: Redact all PII (vertebrate study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$pii: RedactionEntity(type == "PII", dictionaryEntry)
$pii: TextEntity(type == "PII", dictionaryEntry)
then
$pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
@ -453,9 +454,10 @@ rule "ETC.3.1: Redact logos (non vertebrate study)"
rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'"
when
not FileAttribute(label == "Confidentiality", value == "confidential")
$dossierRedaction: RedactionEntity(type == "dossier_redaction")
$dossierRedaction: TextEntity(type == "dossier_redaction")
then
$dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential");
update($dossierRedaction);
$dossierRedaction.getIntersectingNodes().forEach(node -> update(node));
end
@ -489,10 +491,10 @@ rule "AI.1.0: combine and add NER Entities as CBI_address"
rule "MAN.0.0: Apply manual resize redaction"
salience 128
when
$resizeRedaction: ManualResizeRedaction($id: annotationId)
$entityToBeResized: RedactionEntity(matchesAnnotationId($id))
$resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeResized: TextEntity(matchesAnnotationId($id))
then
manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
manualChangesApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
retract($resizeRedaction);
update($entityToBeResized);
$entityToBeResized.getIntersectingNodes().forEach(node -> update(node));
@ -503,11 +505,10 @@ rule "MAN.0.0: Apply manual resize redaction"
rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity"
salience 128
when
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeRemoved: TextEntity(matchesAnnotationId($id))
then
$entityToBeRemoved.ignore("MAN.1.0", "Removed by ManualRedaction");
$entityToBeRemoved.getManualOverwrite().addChange($idRemoval);
update($entityToBeRemoved);
retract($idRemoval);
$entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node));
@ -516,11 +517,10 @@ rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to
rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image"
salience 128
when
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
$imageEntityToBeRemoved: Image($id == id)
then
$imageEntityToBeRemoved.ignore("MAN.1.1", "Removed by ManualRedaction");
$imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval);
update($imageEntityToBeRemoved);
retract($idRemoval);
update($imageEntityToBeRemoved.getParent());
@ -532,29 +532,27 @@ rule "MAN.2.0: Apply force redaction"
no-loop true
salience 128
when
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
$entityToForce: RedactionEntity(matchesAnnotationId($id))
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToForce: TextEntity(matchesAnnotationId($id))
then
$entityToForce.force("MAN.2.0", "Forced redaction", $legalBasis);
$entityToForce.setRemoved(false);
$entityToForce.setIgnored(false);
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
$entityToForce.getManualOverwrite().addChange($force);
update($entityToForce);
$entityToForce.getIntersectingNodes().forEach(node -> update(node));
end
// Rule unit: MAN.3
rule "MAN.3.0: Apply image recategorization"
rule "MAN.3.0: Apply entity recategorization"
salience 128
when
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
$imageToBeRecategorized: Image($id == id)
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeRecategorized: TextEntity(matchesAnnotationId($id))
then
$imageToBeRecategorized.setImageType(ImageType.fromString($imageType));
update($imageToBeRecategorized);
update($imageToBeRecategorized.getParent());
$entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node));
manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization);
retract($recategorization);
// Entity is copied and inserted, so the old entity needs to be retracted to avoid duplication.
retract($entityToBeRecategorized);
end
@ -564,8 +562,8 @@ rule "MAN.3.0: Apply image recategorization"
rule "X.0.0: remove Entity contained by Entity of same type"
salience 65
when
$larger: RedactionEntity($type: type, $entityType: entityType, isActive())
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$larger: TextEntity($type: type, $entityType: entityType, active())
$contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized(), active())
then
$contained.remove("X.0.0", "remove Entity contained by Entity of same type");
retract($contained);
@ -576,10 +574,10 @@ rule "X.0.0: remove Entity contained by Entity of same type"
rule "X.1.0: merge intersecting Entities of same type"
salience 64
when
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$first: TextEntity($type: type, $entityType: entityType, !resized(), active())
$second: TextEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized(), active())
then
RedactionEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document);
TextEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document);
$first.remove("X.1.0", "merge intersecting Entities of same type");
$second.remove("X.1.0", "merge intersecting Entities of same type");
retract($first);
@ -592,8 +590,8 @@ rule "X.1.0: merge intersecting Entities of same type"
rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
salience 64
when
$falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE, isActive())
$entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active())
$entity: TextEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized(), active())
then
$entity.getIntersectingNodes().forEach(node -> update(node));
$entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE");
@ -605,8 +603,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"
salience 64
when
$falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, isActive())
$recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active())
$recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active())
then
$recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION");
retract($recommendation);
@ -617,8 +615,8 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM
rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"
salience 256
when
$entity: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive())
$recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$entity: TextEntity($type: type, entityType == EntityType.ENTITY, active())
$recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active())
then
$entity.addEngines($recommendation.getEngines());
$recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type");
@ -630,8 +628,8 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit
rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
salience 256
when
$entity: RedactionEntity(entityType == EntityType.ENTITY, isActive())
$recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$entity: TextEntity(entityType == EntityType.ENTITY, active())
$recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized(), active())
then
$recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY");
retract($recommendation);
@ -642,8 +640,8 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY"
salience 32
when
$higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive())
$lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$higherRank: TextEntity($type: type, entityType == EntityType.ENTITY, active())
$lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized(), active())
then
$lowerRank.getIntersectingNodes().forEach(node -> update(node));
$lowerRank.remove("X.6.0", "remove Entity of lower rank, when intersected by entity of type ENTITY");

View File

@ -33,23 +33,24 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService;
import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
global Document document
global EntityCreationService entityCreationService
global ManualRedactionApplicationService manualRedactionApplicationService
global ManualChangesApplicationService manualChangesApplicationService
global Dictionary dictionary
//------------------------------------ queries ------------------------------------
@ -90,7 +91,7 @@ rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL"
rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_author", dictionaryEntry)
$entity: TextEntity(type == "CBI_author", dictionaryEntry)
then
$entity.apply("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
@ -98,7 +99,7 @@ rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)"
rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_author", dictionaryEntry)
$entity: TextEntity(type == "CBI_author", dictionaryEntry)
then
$entity.apply("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
@ -108,7 +109,7 @@ rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)"
rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_address", dictionaryEntry)
$entity: TextEntity(type == "CBI_address", dictionaryEntry)
then
$entity.skip("CBI.1.0", "Address found for Non Vertebrate Study");
end
@ -116,7 +117,7 @@ rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)"
rule "CBI.1.1: Redact CBI Address (Vertebrate Study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_address", dictionaryEntry)
$entity: TextEntity(type == "CBI_address", dictionaryEntry)
then
$entity.apply("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
@ -125,9 +126,9 @@ rule "CBI.1.1: Redact CBI Address (Vertebrate Study)"
// Rule unit: CBI.2
rule "CBI.2.0: Don't redact genitive CBI_author"
when
$entity: RedactionEntity(type == "CBI_author", anyMatch(textAfter, "[''ʼˈ´`ʻ']s"), isApplied())
$entity: TextEntity(type == "CBI_author", anyMatch(textAfter, "[''ʼˈ´`ʻ']s"), applied())
then
entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document)
entityCreationService.byBoundary($entity.getTextRange(), "CBI_author", EntityType.FALSE_POSITIVE, document)
.ifPresent(falsePositive -> falsePositive.skip("CBI.2.0", "Genitive Author found"));
end
@ -474,7 +475,7 @@ rule "CBI.12.2: Redact CBI_author, if its row contains a cell with header \"Vert
rule "CBI.13.0: Ignore CBI Address Recommendations"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_address", entityType == EntityType.RECOMMENDATION)
$entity: TextEntity(type == "CBI_address", entityType == EntityType.RECOMMENDATION)
then
$entity.ignore("CBI.13.0", "Ignore CBI Address Recommendations");
retract($entity)
@ -484,7 +485,7 @@ rule "CBI.13.0: Ignore CBI Address Recommendations"
// Rule unit: CBI.14
rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at\""
when
$sponsorEntity: RedactionEntity(type == "CBI_sponsor", textBefore.contains("batches produced at"))
$sponsorEntity: TextEntity(type == "CBI_sponsor", textBefore.contains("batches produced at"))
then
$sponsorEntity.apply("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)");
end
@ -587,7 +588,7 @@ rule "CBI.17.1: Add recommendation for Addresses in Test Organism sections, with
rule "CBI.18.0: Expand CBI_author entities with firstname initials"
no-loop true
when
$entityToExpand: RedactionEntity(type == "CBI_author",
$entityToExpand: TextEntity(type == "CBI_author",
value.matches("[^\\s]+"),
textAfter.startsWith(" "),
anyMatch(textAfter, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)")
@ -595,7 +596,7 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials"
then
entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)")
.ifPresent(expandedEntity -> {
expandedEntity.setMatchedRuleList($entityToExpand.getMatchedRuleList());
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
$entityToExpand.remove("CBI.18.0", "Expand CBI_author entities with firstname initials");
retract($entityToExpand);
});
@ -605,11 +606,11 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials"
// Rule unit: CBI.19
rule "CBI.19.0: Expand CBI_author entities with salutation prefix"
when
$entityToExpand: RedactionEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
$entityToExpand: TextEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
then
entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")
.ifPresent(expandedEntity -> {
expandedEntity.setMatchedRuleList($entityToExpand.getMatchedRuleList());
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
$entityToExpand.remove("CBI.19.0", "Expand CBI_author entities with salutation prefix");
retract($entityToExpand);
});
@ -650,7 +651,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC
rule "PII.0.0: Redact all PII (non vertebrate study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$pii: RedactionEntity(type == "PII", dictionaryEntry)
$pii: TextEntity(type == "PII", dictionaryEntry)
then
$pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
@ -658,7 +659,7 @@ rule "PII.0.0: Redact all PII (non vertebrate study)"
rule "PII.0.1: Redact all PII (vertebrate study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$pii: RedactionEntity(type == "PII", dictionaryEntry)
$pii: TextEntity(type == "PII", dictionaryEntry)
then
$pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
@ -991,10 +992,10 @@ rule "PII.11.0: Redact On behalf of Sequani Ltd.:"
// Rule unit: PII.12
rule "PII.12.0: Expand PII entities with salutation prefix"
when
$entityToExpand: RedactionEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
$entityToExpand: TextEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
then
entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")
.ifPresent(expandedEntity -> expandedEntity.setMatchedRuleList($entityToExpand.getMatchedRuleList()));
.ifPresent(expandedEntity -> expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()));
end
@ -1059,7 +1060,7 @@ rule "ETC.3.1: Redact logos (non vertebrate study)"
// Rule unit: ETC.4
rule "ETC.4.0: Redact dossier dictionary entries"
when
$dossierRedaction: RedactionEntity(type == "dossier_redaction")
$dossierRedaction: TextEntity(type == "dossier_redaction")
then
$dossierRedaction.apply("ETC.4.0", "Specification of impurity found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
@ -1069,7 +1070,7 @@ rule "ETC.4.0: Redact dossier dictionary entries"
rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'"
when
not FileAttribute(label == "Confidentiality", value == "confidential")
$dossierRedaction: RedactionEntity(type == "dossier_redaction")
$dossierRedaction: TextEntity(type == "dossier_redaction")
then
$dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential");
update($dossierRedaction);
@ -1161,10 +1162,10 @@ rule "AI.2.0: add all NER Entities of any type except CBI_author"
rule "MAN.0.0: Apply manual resize redaction"
salience 128
when
$resizeRedaction: ManualResizeRedaction($id: annotationId)
$entityToBeResized: RedactionEntity(matchesAnnotationId($id))
$resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeResized: TextEntity(matchesAnnotationId($id))
then
manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
manualChangesApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
retract($resizeRedaction);
update($entityToBeResized);
$entityToBeResized.getIntersectingNodes().forEach(node -> update(node));
@ -1173,10 +1174,10 @@ rule "MAN.0.0: Apply manual resize redaction"
rule "MAN.0.1: Apply manual resize redaction"
salience 128
when
$resizeRedaction: ManualResizeRedaction($id: annotationId)
$resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
$imageToBeResized: Image(id == $id)
then
manualRedactionApplicationService.resizeImage($imageToBeResized, $resizeRedaction);
manualChangesApplicationService.resizeImage($imageToBeResized, $resizeRedaction);
retract($resizeRedaction);
update($imageToBeResized);
update($imageToBeResized.getParent());
@ -1187,11 +1188,10 @@ rule "MAN.0.1: Apply manual resize redaction"
rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity"
salience 128
when
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeRemoved: TextEntity(matchesAnnotationId($id))
then
$entityToBeRemoved.ignore("MAN.1.0", "Removed by ManualRedaction");
$entityToBeRemoved.getManualOverwrite().addChange($idRemoval);
update($entityToBeRemoved);
retract($idRemoval);
$entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node));
@ -1200,11 +1200,10 @@ rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to
rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image"
salience 128
when
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
$imageEntityToBeRemoved: Image($id == id)
then
$imageEntityToBeRemoved.ignore("MAN.1.1", "Removed by ManualRedaction");
$imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval);
update($imageEntityToBeRemoved);
retract($idRemoval);
update($imageEntityToBeRemoved.getParent());
@ -1216,13 +1215,10 @@ rule "MAN.2.0: Apply force redaction"
no-loop true
salience 128
when
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
$entityToForce: RedactionEntity(matchesAnnotationId($id))
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToForce: TextEntity(matchesAnnotationId($id))
then
$entityToForce.force("MAN.2.0", "Forced redaction", $legalBasis);
$entityToForce.setRemoved(false);
$entityToForce.setIgnored(false);
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
$entityToForce.getManualOverwrite().addChange($force);
update($entityToForce);
$entityToForce.getIntersectingNodes().forEach(node -> update(node));
end
@ -1231,29 +1227,60 @@ rule "MAN.2.1: Apply force redaction to images"
no-loop true
salience 128
when
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
$imageToForce: Image(id == $id)
then
$imageToForce.force("MAN.2.0", "Forced redaction", $legalBasis);
$imageToForce.setRemoved(false);
$imageToForce.setIgnored(false);
$imageToForce.getManualOverwrite().addChange($force);
update($imageToForce);
update($imageToForce.getParent());
end
// Rule unit: MAN.3
rule "MAN.3.0: Apply image recategorization"
rule "MAN.3.0: Apply entity recategorization"
salience 128
when
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeRecategorized: TextEntity(matchesAnnotationId($id))
then
$entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node));
manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization);
retract($recategorization);
// Entity is copied and inserted, so the old entity needs to be retracted to avoid duplication.
retract($entityToBeRecategorized);
end
rule "MAN.3.1: Apply image recategorization"
salience 128
when
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED)
$imageToBeRecategorized: Image($id == id)
then
$imageToBeRecategorized.setImageType(ImageType.fromString($imageType));
manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization);
update($imageToBeRecategorized);
update($imageToBeRecategorized.getParent());
retract($recategorization);
end
// Rule unit: MAN.4
rule "MAN.4.0: Apply legal basis change"
salience 128
when
$legalbasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED)
$imageToBeRecategorized: Image($id == id)
then
$imageToBeRecategorized.getManualOverwrite().addChange($legalbasisChange);
end
rule "MAN.4.1: Apply legal basis change"
salience 128
when
$legalBasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeChanged: TextEntity(matchesAnnotationId($id))
then
$entityToBeChanged.getManualOverwrite().addChange($legalBasisChange);
end
//------------------------------------ Entity merging rules ------------------------------------
@ -1261,8 +1288,8 @@ rule "MAN.3.0: Apply image recategorization"
rule "X.0.0: remove Entity contained by Entity of same type"
salience 65
when
$larger: RedactionEntity($type: type, $entityType: entityType, isActive())
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$larger: TextEntity($type: type, $entityType: entityType, active())
$contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized(), active())
then
$contained.remove("X.0.0", "remove Entity contained by Entity of same type");
retract($contained);
@ -1273,10 +1300,10 @@ rule "X.0.0: remove Entity contained by Entity of same type"
rule "X.1.0: merge intersecting Entities of same type"
salience 64
when
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$first: TextEntity($type: type, $entityType: entityType, !resized(), active())
$second: TextEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized(), active())
then
RedactionEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document);
TextEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document);
$first.remove("X.1.0", "merge intersecting Entities of same type");
$second.remove("X.1.0", "merge intersecting Entities of same type");
retract($first);
@ -1289,8 +1316,8 @@ rule "X.1.0: merge intersecting Entities of same type"
rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
salience 64
when
$falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE, isActive())
$entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active())
$entity: TextEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized(), active())
then
$entity.getIntersectingNodes().forEach(node -> update(node));
$entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE");
@ -1302,8 +1329,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"
salience 64
when
$falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, isActive())
$recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active())
$recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active())
then
$recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION");
retract($recommendation);
@ -1314,8 +1341,8 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM
rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"
salience 256
when
$entity: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive())
$recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$entity: TextEntity($type: type, entityType == EntityType.ENTITY, active())
$recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active())
then
$entity.addEngines($recommendation.getEngines());
$recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type");
@ -1327,8 +1354,8 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit
rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
salience 256
when
$entity: RedactionEntity(entityType == EntityType.ENTITY, isActive())
$recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$entity: TextEntity(entityType == EntityType.ENTITY, active())
$recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized(), active())
then
$recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY");
retract($recommendation);
@ -1339,8 +1366,8 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY"
salience 32
when
$higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive())
$lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$higherRank: TextEntity($type: type, entityType == EntityType.ENTITY, active())
$lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized(), active())
then
$lowerRank.getIntersectingNodes().forEach(node -> update(node));
$lowerRank.remove("X.6.0", "remove Entity of lower rank, when intersected by entity of type ENTITY");

View File

@ -34,22 +34,22 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService;
import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
global Document document
global EntityCreationService entityCreationService
global ManualRedactionApplicationService manualRedactionApplicationService
global ManualChangesApplicationService manualRedactionApplicationService
global Dictionary dictionary
//------------------------------------ queries ------------------------------------
@ -397,7 +397,7 @@ rule "DOC.8.1: Performing Laboratory (Name)"
$section: Section(containsString("PERFORMING LABORATORY:"))
then
nerEntities.streamEntitiesOfType("COUNTRY")
.filter(nerEntity -> $section.getBoundary().contains(nerEntity.boundary()))
.filter(nerEntity -> $section.getTextRange().contains(nerEntity.textRange()))
.map(nerEntity -> entityCreationService.byNerEntity(nerEntity, "laboratory_country", EntityType.ENTITY, $section))
.forEach(entity -> {
entity.apply("DOC.8.2", "Performing Laboratory found", "n-a");
@ -1221,7 +1221,7 @@ rule "DOC.44.0: Results (Main Study)"
FileAttribute(label == "OECD Number", value == "429")
$section: Section(
getHeadline().containsString("Results")
&& getHeadline().getBoundary().length() < 20
&& getHeadline().getGetTextRange().length() < 20
&& !(getHeadline().containsString("Appendix") || getHeadline().containsString("Table"))
)
then
@ -1262,7 +1262,7 @@ rule "MAN.0.0: Apply manual resize redaction"
salience 128
when
$resizeRedaction: ManualResizeRedaction($id: annotationId)
$entityToBeResized: RedactionEntity(matchesAnnotationId($id))
$entityToBeResized: TextEntity(matchesAnnotationId($id))
then
manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
retract($resizeRedaction);
@ -1276,7 +1276,7 @@ rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to
when
IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
$entityToBeRemoved: TextEntity(matchesAnnotationId($id))
then
$entityToBeRemoved.removeFromGraph();
retract($entityToBeRemoved);
@ -1298,7 +1298,7 @@ rule "MAN.2.0: Apply force redaction"
salience 128
when
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
$entityToForce: RedactionEntity(matchesAnnotationId($id))
$entityToForce: TextEntity(matchesAnnotationId($id))
then
$entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis);
$entityToForce.setRemoved(false);
@ -1328,8 +1328,8 @@ rule "MAN.3.0: Apply image recategorization"
rule "X.0.0: remove Entity contained by Entity of same type"
salience 65
when
$larger: RedactionEntity($type: type, $entityType: entityType, isActive())
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$larger: TextEntity($type: type, $entityType: entityType, getActive())
$contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, getActive())
then
$contained.remove("X.0.0", "remove Entity contained by Entity of same type");
retract($contained);

View File

@ -33,23 +33,24 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService;
import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
global Document document
global EntityCreationService entityCreationService
global ManualRedactionApplicationService manualRedactionApplicationService
global ManualChangesApplicationService manualChangesApplicationService
global Dictionary dictionary
//------------------------------------ queries ------------------------------------
@ -58,14 +59,16 @@ query "getFileAttributes"
$fileAttribute: FileAttribute()
end
//------------------------------------ Manual redaction rules ------------------------------------
// Rule unit: MAN.0
rule "MAN.0.0: Apply manual resize redaction"
salience 128
when
$resizeRedaction: ManualResizeRedaction($id: annotationId)
$entityToBeResized: RedactionEntity(matchesAnnotationId($id))
$resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeResized: TextEntity(matchesAnnotationId($id))
then
manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
manualChangesApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
retract($resizeRedaction);
update($entityToBeResized);
$entityToBeResized.getIntersectingNodes().forEach(node -> update(node));
@ -74,10 +77,10 @@ rule "MAN.0.0: Apply manual resize redaction"
rule "MAN.0.1: Apply manual resize redaction"
salience 128
when
$resizeRedaction: ManualResizeRedaction($id: annotationId)
$resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
$imageToBeResized: Image(id == $id)
then
manualRedactionApplicationService.resizeImage($imageToBeResized, $resizeRedaction);
manualChangesApplicationService.resizeImage($imageToBeResized, $resizeRedaction);
retract($resizeRedaction);
update($imageToBeResized);
update($imageToBeResized.getParent());
@ -88,11 +91,10 @@ rule "MAN.0.1: Apply manual resize redaction"
rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity"
salience 128
when
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeRemoved: TextEntity(matchesAnnotationId($id))
then
$entityToBeRemoved.ignore("MAN.1.0", "Removed by ManualRedaction");
$entityToBeRemoved.getManualOverwrite().addChange($idRemoval);
update($entityToBeRemoved);
retract($idRemoval);
$entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node));
@ -101,11 +103,10 @@ rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to
rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image"
salience 128
when
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
$imageEntityToBeRemoved: Image($id == id)
then
$imageEntityToBeRemoved.ignore("MAN.1.1", "Removed by ManualRedaction");
$imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval);
update($imageEntityToBeRemoved);
retract($idRemoval);
update($imageEntityToBeRemoved.getParent());
@ -117,13 +118,10 @@ rule "MAN.2.0: Apply force redaction"
no-loop true
salience 128
when
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
$entityToForce: RedactionEntity(matchesAnnotationId($id))
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToForce: TextEntity(matchesAnnotationId($id))
then
$entityToForce.force("MAN.2.0", "Forced redaction", $legalBasis);
$entityToForce.setRemoved(false);
$entityToForce.setIgnored(false);
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
$entityToForce.getManualOverwrite().addChange($force);
update($entityToForce);
$entityToForce.getIntersectingNodes().forEach(node -> update(node));
end
@ -132,29 +130,61 @@ rule "MAN.2.1: Apply force redaction to images"
no-loop true
salience 128
when
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
$imageToForce: Image(id == $id)
then
$imageToForce.force("MAN.2.0", "Forced redaction", $legalBasis);
$imageToForce.setRemoved(false);
$imageToForce.setIgnored(false);
$imageToForce.getManualOverwrite().addChange($force);
update($imageToForce);
update($imageToForce.getParent());
end
// Rule unit: MAN.3
rule "MAN.3.0: Apply image recategorization"
rule "MAN.3.0: Apply entity recategorization"
salience 128
when
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeRecategorized: TextEntity(matchesAnnotationId($id))
then
manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization);
$entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node));
retract($recategorization);
// Entity is copied and inserted, so the old entity needs to be retracted to avoid duplication.
retract($entityToBeRecategorized);
end
rule "MAN.3.1: Apply image recategorization"
salience 128
when
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED)
$imageToBeRecategorized: Image($id == id)
then
$imageToBeRecategorized.setImageType(ImageType.fromString($imageType));
manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization);
update($imageToBeRecategorized);
update($imageToBeRecategorized.getParent());
retract($recategorization);
end
// Rule unit: MAN.4
rule "MAN.4.0: Apply legal basis change"
salience 128
when
$legalbasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED)
$imageToBeRecategorized: Image($id == id)
then
$imageToBeRecategorized.getManualOverwrite().addChange($legalbasisChange);
end
rule "MAN.4.1: Apply legal basis change"
salience 128
when
$legalBasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeChanged: TextEntity(matchesAnnotationId($id))
then
$entityToBeChanged.getManualOverwrite().addChange($legalBasisChange);
end
//------------------------------------ Local dictionary search rules ------------------------------------
// Rule unit: LDS.0

View File

@ -33,23 +33,24 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService;
import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
global Document document
global EntityCreationService entityCreationService
global ManualRedactionApplicationService manualRedactionApplicationService
global ManualChangesApplicationService manualChangesApplicationService
global Dictionary dictionary
//------------------------------------ queries ------------------------------------
@ -315,7 +316,7 @@ rule "CBI.12.2: Redact CBI_author, if its row contains a cell with header \"Vert
// Rule unit: CBI.14
rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at\""
when
$sponsorEntity: RedactionEntity(type == "CBI_sponsor", textBefore.contains("batches produced at"))
$sponsorEntity: TextEntity(type == "CBI_sponsor", textBefore.contains("batches produced at"))
then
$sponsorEntity.apply("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)");
end
@ -418,7 +419,7 @@ rule "CBI.17.1: Add recommendation for Addresses in Test Organism sections, with
rule "CBI.18.0: Expand CBI_author entities with firstname initials"
no-loop true
when
$entityToExpand: RedactionEntity(type == "CBI_author",
$entityToExpand: TextEntity(type == "CBI_author",
value.matches("[^\\s]+"),
textAfter.startsWith(" "),
anyMatch(textAfter, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)")
@ -426,7 +427,7 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials"
then
entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)")
.ifPresent(expandedEntity -> {
expandedEntity.setMatchedRuleList($entityToExpand.getMatchedRuleList());
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
$entityToExpand.remove("CBI.18.0", "Expand CBI_author entities with firstname initials");
retract($entityToExpand);
});
@ -436,11 +437,11 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials"
// Rule unit: CBI.19
rule "CBI.19.0: Expand CBI_author entities with salutation prefix"
when
$entityToExpand: RedactionEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
$entityToExpand: TextEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
then
entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")
.ifPresent(expandedEntity -> {
expandedEntity.setMatchedRuleList($entityToExpand.getMatchedRuleList());
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
$entityToExpand.remove("CBI.19.0", "Expand CBI_author entities with salutation prefix");
retract($entityToExpand);
});
@ -481,7 +482,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC
rule "PII.0.0: Redact all PII (non vertebrate study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$pii: RedactionEntity(type == "PII", dictionaryEntry)
$pii: TextEntity(type == "PII", dictionaryEntry)
then
$pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
@ -489,7 +490,7 @@ rule "PII.0.0: Redact all PII (non vertebrate study)"
rule "PII.0.1: Redact all PII (vertebrate study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$pii: RedactionEntity(type == "PII", dictionaryEntry)
$pii: TextEntity(type == "PII", dictionaryEntry)
then
$pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
@ -726,10 +727,10 @@ rule "PII.11.0: Redact On behalf of Sequani Ltd.:"
// Rule unit: PII.12
rule "PII.12.0: Expand PII entities with salutation prefix"
when
$entityToExpand: RedactionEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
$entityToExpand: TextEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
then
entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")
.ifPresent(expandedEntity -> expandedEntity.setMatchedRuleList($entityToExpand.getMatchedRuleList()));
.ifPresent(expandedEntity -> expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()));
end
@ -784,7 +785,7 @@ rule "ETC.3.1: Redact logos (non vertebrate study)"
// Rule unit: ETC.4
rule "ETC.4.0: Redact dossier dictionary entries"
when
$dossierRedaction: RedactionEntity(type == "dossier_redaction")
$dossierRedaction: TextEntity(type == "dossier_redaction")
then
$dossierRedaction.apply("ETC.4.0", "Specification of impurity found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
@ -794,7 +795,7 @@ rule "ETC.4.0: Redact dossier dictionary entries"
rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'"
when
not FileAttribute(label == "Confidentiality", value == "confidential")
$dossierRedaction: RedactionEntity(type == "dossier_redaction")
$dossierRedaction: TextEntity(type == "dossier_redaction")
then
$dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential");
update($dossierRedaction);
@ -874,10 +875,10 @@ rule "AI.1.0: combine and add NER Entities as CBI_address"
rule "MAN.0.0: Apply manual resize redaction"
salience 128
when
$resizeRedaction: ManualResizeRedaction($id: annotationId)
$entityToBeResized: RedactionEntity(matchesAnnotationId($id))
$resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeResized: TextEntity(matchesAnnotationId($id))
then
manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
manualChangesApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
retract($resizeRedaction);
update($entityToBeResized);
$entityToBeResized.getIntersectingNodes().forEach(node -> update(node));
@ -886,10 +887,10 @@ rule "MAN.0.0: Apply manual resize redaction"
rule "MAN.0.1: Apply manual resize redaction"
salience 128
when
$resizeRedaction: ManualResizeRedaction($id: annotationId)
$resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
$imageToBeResized: Image(id == $id)
then
manualRedactionApplicationService.resizeImage($imageToBeResized, $resizeRedaction);
manualChangesApplicationService.resizeImage($imageToBeResized, $resizeRedaction);
retract($resizeRedaction);
update($imageToBeResized);
update($imageToBeResized.getParent());
@ -900,11 +901,10 @@ rule "MAN.0.1: Apply manual resize redaction"
rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity"
salience 128
when
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeRemoved: TextEntity(matchesAnnotationId($id))
then
$entityToBeRemoved.ignore("MAN.1.0", "Removed by ManualRedaction");
$entityToBeRemoved.getManualOverwrite().addChange($idRemoval);
update($entityToBeRemoved);
retract($idRemoval);
$entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node));
@ -913,11 +913,10 @@ rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to
rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image"
salience 128
when
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
$imageEntityToBeRemoved: Image($id == id)
then
$imageEntityToBeRemoved.ignore("MAN.1.1", "Removed by ManualRedaction");
$imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval);
update($imageEntityToBeRemoved);
retract($idRemoval);
update($imageEntityToBeRemoved.getParent());
@ -929,13 +928,10 @@ rule "MAN.2.0: Apply force redaction"
no-loop true
salience 128
when
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
$entityToForce: RedactionEntity(matchesAnnotationId($id))
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToForce: TextEntity(matchesAnnotationId($id))
then
$entityToForce.force("MAN.2.0", "Forced redaction", $legalBasis);
$entityToForce.setRemoved(false);
$entityToForce.setIgnored(false);
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
$entityToForce.getManualOverwrite().addChange($force);
update($entityToForce);
$entityToForce.getIntersectingNodes().forEach(node -> update(node));
end
@ -944,39 +940,70 @@ rule "MAN.2.1: Apply force redaction to images"
no-loop true
salience 128
when
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
$imageToForce: Image(id == $id)
then
$imageToForce.force("MAN.2.0", "Forced redaction", $legalBasis);
$imageToForce.setRemoved(false);
$imageToForce.setIgnored(false);
$imageToForce.getManualOverwrite().addChange($force);
update($imageToForce);
update($imageToForce.getParent());
end
// Rule unit: MAN.3
rule "MAN.3.0: Apply image recategorization"
rule "MAN.3.0: Apply entity recategorization"
salience 128
when
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeRecategorized: TextEntity(matchesAnnotationId($id))
then
$entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node));
manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization);
retract($recategorization);
// Entity is copied and inserted, so the old entity needs to be retracted to avoid duplication.
retract($entityToBeRecategorized);
end
rule "MAN.3.1: Apply image recategorization"
salience 128
when
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED)
$imageToBeRecategorized: Image($id == id)
then
$imageToBeRecategorized.setImageType(ImageType.fromString($imageType));
manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization);
update($imageToBeRecategorized);
update($imageToBeRecategorized.getParent());
retract($recategorization);
end
// Rule unit: MAN.4
rule "MAN.4.0: Apply legal basis change"
salience 128
when
$legalbasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED)
$imageToBeRecategorized: Image($id == id)
then
$imageToBeRecategorized.getManualOverwrite().addChange($legalbasisChange);
end
rule "MAN.4.1: Apply legal basis change"
salience 128
when
$legalBasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeChanged: TextEntity(matchesAnnotationId($id))
then
$entityToBeChanged.getManualOverwrite().addChange($legalBasisChange);
end
//------------------------------------ Entity merging rules ------------------------------------
// Rule unit: X.0
rule "X.0.0: remove Entity contained by Entity of same type"
salience 65
when
$larger: RedactionEntity($type: type, $entityType: entityType, isActive())
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$larger: TextEntity($type: type, $entityType: entityType, active())
$contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized(), active())
then
$contained.remove("X.0.0", "remove Entity contained by Entity of same type");
retract($contained);
@ -987,10 +1014,10 @@ rule "X.0.0: remove Entity contained by Entity of same type"
rule "X.1.0: merge intersecting Entities of same type"
salience 64
when
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$first: TextEntity($type: type, $entityType: entityType, !resized(), active())
$second: TextEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized(), active())
then
RedactionEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document);
TextEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document);
$first.remove("X.1.0", "merge intersecting Entities of same type");
$second.remove("X.1.0", "merge intersecting Entities of same type");
retract($first);
@ -1003,8 +1030,8 @@ rule "X.1.0: merge intersecting Entities of same type"
rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
salience 64
when
$falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE, isActive())
$entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active())
$entity: TextEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized(), active())
then
$entity.getIntersectingNodes().forEach(node -> update(node));
$entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE");
@ -1016,8 +1043,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"
salience 64
when
$falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, isActive())
$recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active())
$recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active())
then
$recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION");
retract($recommendation);
@ -1028,8 +1055,8 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM
rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"
salience 256
when
$entity: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive())
$recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$entity: TextEntity($type: type, entityType == EntityType.ENTITY, active())
$recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active())
then
$entity.addEngines($recommendation.getEngines());
$recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type");
@ -1041,8 +1068,8 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit
rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
salience 256
when
$entity: RedactionEntity(entityType == EntityType.ENTITY, isActive())
$recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$entity: TextEntity(entityType == EntityType.ENTITY, active())
$recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized(), active())
then
$recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY");
retract($recommendation);
@ -1053,8 +1080,8 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY"
salience 32
when
$higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive())
$lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$higherRank: TextEntity($type: type, entityType == EntityType.ENTITY, active())
$lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized(), active())
then
$lowerRank.getIntersectingNodes().forEach(node -> update(node));
$lowerRank.remove("X.6.0", "remove Entity of lower rank, when intersected by entity of type ENTITY");

View File

@ -34,22 +34,22 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService;
import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
global Document document
global EntityCreationService entityCreationService
global ManualRedactionApplicationService manualRedactionApplicationService
global ManualChangesApplicationService manualChangesApplicationService
global Dictionary dictionary
// --------------------------------------- queries -------------------------------------------------------------------
@ -64,7 +64,7 @@ rule "add NER Entities of type CBI_author or CBI_address"
when
$nerEntity: EntityRecognitionEntity($type: type, (type == "CBI_author" || type == "CBI_address"))
then
entityCreationService.byBoundary(new Boundary($nerEntity.getStartOffset(), $nerEntity.getEndOffset()), $type, EntityType.RECOMMENDATION, document)
entityCreationService.byBoundary(new TextRange($nerEntity.getStartOffset(), $nerEntity.getEndOffset()), $type, EntityType.RECOMMENDATION, document)
.ifPresent(redactionEntity -> insert(redactionEntity));
end
@ -73,7 +73,7 @@ rule "add NER Entities of type CBI_author or CBI_address"
rule "Always redact CBI_author"
when
$cbiAuthor: RedactionEntity(type == "CBI_author", entityType == EntityType.ENTITY)
$cbiAuthor: TextEntity(type == "CBI_author", entityType == EntityType.ENTITY)
then
$cbiAuthor.apply("CBI.0.0", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
@ -83,7 +83,7 @@ rule "Always redact CBI_author"
rule "Always redact PII"
when
$cbiAuthor: RedactionEntity(type == "PII", entityType == EntityType.ENTITY)
$cbiAuthor: TextEntity(type == "PII", entityType == EntityType.ENTITY)
then
$cbiAuthor.apply("PII.0.0", "PII found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
@ -96,8 +96,8 @@ rule "Always redact PII"
rule "X.0.0: remove Entity contained by Entity of same type"
salience 65
when
$larger: RedactionEntity($type: type, $entityType: entityType, isActive())
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$larger: TextEntity($type: type, $entityType: entityType, active())
$contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized(), active())
then
$contained.remove("X.0.0", "remove Entity contained by Entity of same type");
retract($contained);
@ -108,10 +108,10 @@ rule "X.0.0: remove Entity contained by Entity of same type"
rule "X.1.0: merge intersecting Entities of same type"
salience 64
when
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$first: TextEntity($type: type, $entityType: entityType, !resized(), active())
$second: TextEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized(), active())
then
RedactionEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document);
TextEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document);
$first.remove("X.1.0", "merge intersecting Entities of same type");
$second.remove("X.1.0", "merge intersecting Entities of same type");
retract($first);
@ -124,8 +124,8 @@ rule "X.1.0: merge intersecting Entities of same type"
rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
salience 64
when
$falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE, isActive())
$entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active())
$entity: TextEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized(), active())
then
$entity.getIntersectingNodes().forEach(node -> update(node));
$entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE");
@ -137,8 +137,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"
salience 64
when
$falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, isActive())
$recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active())
$recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active())
then
$recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION");
retract($recommendation);
@ -149,8 +149,8 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM
rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"
salience 256
when
$entity: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive())
$recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$entity: TextEntity($type: type, entityType == EntityType.ENTITY, active())
$recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active())
then
$entity.addEngines($recommendation.getEngines());
$recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type");
@ -162,8 +162,8 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit
rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
salience 256
when
$entity: RedactionEntity(entityType == EntityType.ENTITY, isActive())
$recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$entity: TextEntity(entityType == EntityType.ENTITY, active())
$recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized(), active())
then
$recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY");
retract($recommendation);
@ -174,8 +174,8 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY"
salience 32
when
$higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive())
$lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$higherRank: TextEntity($type: type, entityType == EntityType.ENTITY, active())
$lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized(), active())
then
$lowerRank.getIntersectingNodes().forEach(node -> update(node));
$lowerRank.remove("X.6.0", "remove Entity of lower rank, when intersected by entity of type ENTITY");

View File

@ -33,22 +33,23 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
import com.iqser.red.service.redaction.v1.server.document.services.ManualRedactionApplicationService;
import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.TextRange;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity;
global Document document
global EntityCreationService entityCreationService
global ManualRedactionApplicationService manualRedactionApplicationService
global ManualChangesApplicationService manualChangesApplicationService
global NerEntitiesAdapter nerEntitiesAdapter
global Dictionary dictionary
@ -77,7 +78,7 @@ rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL"
rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_author", dictionaryEntry)
$entity: TextEntity(type == "CBI_author", dictionaryEntry)
then
$entity.apply("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
@ -85,7 +86,7 @@ rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)"
rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_author", dictionaryEntry)
$entity: TextEntity(type == "CBI_author", dictionaryEntry)
then
$entity.apply("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
@ -95,7 +96,7 @@ rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)"
rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_address", dictionaryEntry)
$entity: TextEntity(type == "CBI_address", dictionaryEntry)
then
$entity.skip("CBI.1.0", "Address found for Non Vertebrate Study");
end
@ -103,7 +104,7 @@ rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)"
rule "CBI.1.1: Redact CBI Address (Vertebrate Study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_address", dictionaryEntry)
$entity: TextEntity(type == "CBI_address", dictionaryEntry)
then
$entity.apply("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
@ -112,9 +113,9 @@ rule "CBI.1.1: Redact CBI Address (Vertebrate Study)"
// Rule unit: CBI.2
rule "CBI.2.0: Don't redact genitive CBI_author"
when
$entity: RedactionEntity(type == "CBI_author", anyMatch(textAfter, "[''ʼˈ´`ʻ']s"), isApplied())
$entity: TextEntity(type == "CBI_author", anyMatch(textAfter, "[''ʼˈ´`ʻ']s"), applied())
then
entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document)
entityCreationService.byBoundary($entity.getTextRange(), "CBI_author", EntityType.FALSE_POSITIVE, document)
.ifPresent(falsePositive -> falsePositive.skip("CBI.2.0", "Genitive Author found"));
end
@ -299,7 +300,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC
rule "PII.0.0: Redact all PII (non vertebrate study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$pii: RedactionEntity(type == "PII", dictionaryEntry)
$pii: TextEntity(type == "PII", dictionaryEntry)
then
$pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
@ -307,7 +308,7 @@ rule "PII.0.0: Redact all PII (non vertebrate study)"
rule "PII.0.1: Redact all PII (vertebrate study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$pii: RedactionEntity(type == "PII", dictionaryEntry)
$pii: TextEntity(type == "PII", dictionaryEntry)
then
$pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
@ -453,7 +454,7 @@ rule "ETC.3.1: Redact logos (non vertebrate study)"
rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'"
when
not FileAttribute(label == "Confidentiality", value == "confidential")
$dossierRedaction: RedactionEntity(type == "dossier_redaction")
$dossierRedaction: TextEntity(type == "dossier_redaction")
then
$dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential");
update($dossierRedaction);
@ -492,25 +493,36 @@ rule "AI.1.0: combine and add NER Entities as CBI_address"
rule "MAN.0.0: Apply manual resize redaction"
salience 128
when
$resizeRedaction: ManualResizeRedaction($id: annotationId)
$entityToBeResized: RedactionEntity(matchesAnnotationId($id))
$resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeResized: TextEntity(matchesAnnotationId($id))
then
manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
manualChangesApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
retract($resizeRedaction);
update($entityToBeResized);
$entityToBeResized.getIntersectingNodes().forEach(node -> update(node));
end
rule "MAN.0.1: Apply manual resize redaction"
salience 128
when
$resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
$imageToBeResized: Image(id == $id)
then
manualChangesApplicationService.resizeImage($imageToBeResized, $resizeRedaction);
retract($resizeRedaction);
update($imageToBeResized);
update($imageToBeResized.getParent());
end
// Rule unit: MAN.1
rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity"
salience 128
when
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeRemoved: TextEntity(matchesAnnotationId($id))
then
$entityToBeRemoved.ignore("MAN.1.0", "Removed by ManualRedaction");
$entityToBeRemoved.getManualOverwrite().addChange($idRemoval);
update($entityToBeRemoved);
retract($idRemoval);
$entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node));
@ -519,11 +531,10 @@ rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to
rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image"
salience 128
when
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
$imageEntityToBeRemoved: Image($id == id)
then
$imageEntityToBeRemoved.ignore("MAN.1.1", "Removed by ManualRedaction");
$imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval);
update($imageEntityToBeRemoved);
retract($idRemoval);
update($imageEntityToBeRemoved.getParent());
@ -535,31 +546,72 @@ rule "MAN.2.0: Apply force redaction"
no-loop true
salience 128
when
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
$entityToForce: RedactionEntity(matchesAnnotationId($id))
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToForce: TextEntity(matchesAnnotationId($id))
then
$entityToForce.force("MAN.2.0", "Forced redaction", $legalBasis);
$entityToForce.setRemoved(false);
$entityToForce.setIgnored(false);
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
$entityToForce.getManualOverwrite().addChange($force);
update($entityToForce);
$entityToForce.getIntersectingNodes().forEach(node -> update(node));
end
// Rule unit: MAN.3
rule "MAN.3.0: Apply image recategorization"
rule "MAN.2.1: Apply force redaction to images"
no-loop true
salience 128
when
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
$imageToForce: Image(id == $id)
then
$imageToForce.getManualOverwrite().addChange($force);
update($imageToForce);
update($imageToForce.getParent());
end
// Rule unit: MAN.3
rule "MAN.3.0: Apply entity recategorization"
salience 128
when
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeRecategorized: TextEntity(matchesAnnotationId($id))
then
$entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node));
manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization);
retract($recategorization);
// Entity is copied and inserted, so the old entity needs to be retracted to avoid duplication.
retract($entityToBeRecategorized);
end
rule "MAN.3.1: Apply image recategorization"
salience 128
when
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED)
$imageToBeRecategorized: Image($id == id)
then
$imageToBeRecategorized.setImageType(ImageType.fromString($imageType));
manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization);
update($imageToBeRecategorized);
update($imageToBeRecategorized.getParent());
retract($recategorization);
end
// Rule unit: MAN.4
rule "MAN.4.0: Apply legal basis change"
salience 128
when
$legalbasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED)
$imageToBeRecategorized: Image($id == id)
then
$imageToBeRecategorized.getManualOverwrite().addChange($legalbasisChange);
end
rule "MAN.4.1: Apply legal basis change"
salience 128
when
$legalBasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeChanged: TextEntity(matchesAnnotationId($id))
then
$entityToBeChanged.getManualOverwrite().addChange($legalBasisChange);
end
//------------------------------------ Entity merging rules ------------------------------------
@ -567,8 +619,8 @@ rule "MAN.3.0: Apply image recategorization"
rule "X.0.0: remove Entity contained by Entity of same type"
salience 65
when
$larger: RedactionEntity($type: type, $entityType: entityType, isActive())
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$larger: TextEntity($type: type, $entityType: entityType, active())
$contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized(), active())
then
$contained.remove("X.0.0", "remove Entity contained by Entity of same type");
retract($contained);
@ -579,10 +631,10 @@ rule "X.0.0: remove Entity contained by Entity of same type"
rule "X.1.0: merge intersecting Entities of same type"
salience 64
when
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$first: TextEntity($type: type, $entityType: entityType, !resized(), active())
$second: TextEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized(), active())
then
RedactionEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document);
TextEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document);
$first.remove("X.1.0", "merge intersecting Entities of same type");
$second.remove("X.1.0", "merge intersecting Entities of same type");
retract($first);
@ -595,8 +647,8 @@ rule "X.1.0: merge intersecting Entities of same type"
rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
salience 64
when
$falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE, isActive())
$entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active())
$entity: TextEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized(), active())
then
$entity.getIntersectingNodes().forEach(node -> update(node));
$entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE");
@ -608,8 +660,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"
salience 64
when
$falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, isActive())
$recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active())
$recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active())
then
$recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION");
retract($recommendation);
@ -620,8 +672,8 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM
rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"
salience 256
when
$entity: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive())
$recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$entity: TextEntity($type: type, entityType == EntityType.ENTITY, active())
$recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active())
then
$entity.addEngines($recommendation.getEngines());
$recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type");
@ -633,8 +685,8 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit
rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
salience 256
when
$entity: RedactionEntity(entityType == EntityType.ENTITY, isActive())
$recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$entity: TextEntity(entityType == EntityType.ENTITY, active())
$recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized(), active())
then
$recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY");
retract($recommendation);
@ -645,8 +697,8 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY"
salience 32
when
$higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY, isActive())
$lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$higherRank: TextEntity($type: type, entityType == EntityType.ENTITY, active())
$lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized(), active())
then
$lowerRank.getIntersectingNodes().forEach(node -> update(node));
$lowerRank.remove("X.6.0", "remove Entity of lower rank, when intersected by entity of type ENTITY");