Merge branch 'RED-6929' into 'master'

RED-6929: fix acceptance tests/rules

Closes RED-6929

See merge request redactmanager/redaction-service!32
This commit is contained in:
Kilian Schüttler 2023-07-05 21:50:46 +02:00
commit ee65044578
45 changed files with 2083 additions and 397 deletions

View File

@ -9,6 +9,7 @@ import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@ -72,6 +73,8 @@ public class RedactionLogEntryAdapter {
return searchImplementation.getBoundaries(node.getTextBlock(), node.getBoundary())
.stream()
.map(boundary -> entityCreationService.byBoundary(boundary, "temp", EntityType.ENTITY, node))
.filter(Optional::isPresent)
.map(Optional::get)
.collect(groupingBy(entity -> entity.getValue().toLowerCase(Locale.ROOT)));
}
@ -100,8 +103,7 @@ public class RedactionLogEntryAdapter {
RedactionEntity correctEntity = entityCreationService.byBoundary(closestEntity.getBoundary(),
redactionLogEntry.getType(),
redactionLogEntry.isRecommendation() ? EntityType.RECOMMENDATION : EntityType.ENTITY,
node);
node).orElseThrow();
String ruleIdentifier = redactionLogEntry.getType() + "." + redactionLogEntry.getMatchedRule() + ".0";
if (redactionLogEntry.isRedacted()) {
correctEntity.apply(ruleIdentifier, redactionLogEntry.getReason(), redactionLogEntry.getLegalBasis());

View File

@ -51,6 +51,8 @@ import org.apache.pdfbox.text.TextPosition;
import org.apache.pdfbox.text.TextPositionComparator;
import org.apache.pdfbox.util.QuickSort;
import com.iqser.red.service.persistence.service.v1.api.shared.model.utils.SuppressFBWarnings;
/**
* This is just a copy except i only adjusted lines 594-607 cause this is a bug in Pdfbox.
* see S416.pdf
@ -1737,6 +1739,7 @@ public class PDFTextStripper extends LegacyPDFStreamEngine {
}
@SuppressFBWarnings
private static Map<Character, Character> MIRRORING_CHAR_MAP = new HashMap<>();
static {

View File

@ -83,7 +83,7 @@ public class DocumentGraphFactory {
List<TextPageBlock> textBlocks = new ArrayList<>(textBlocksToMerge);
textBlocks.add(originalTextBlock);
AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), node, context, page);
AtomicTextBlock textBlock = context.textBlockFactory.fromContext(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), node, context, page);
List<Integer> treeId = context.documentTree.createNewChildEntryAndReturnId(parentNode, node);
node.setLeafTextBlock(textBlock);
node.setTreeId(treeId);
@ -145,10 +145,7 @@ public class DocumentGraphFactory {
Page page = context.getPage(textBlocks.get(0).getPage());
Footer footer = Footer.builder().documentTree(context.getDocumentTree()).build();
AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks),
footer,
context,
page);
AtomicTextBlock textBlock = context.textBlockFactory.fromContext(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), footer, context, page);
List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(footer);
footer.setTreeId(tocId);
footer.setLeafTextBlock(textBlock);
@ -160,7 +157,7 @@ public class DocumentGraphFactory {
Page page = context.getPage(textBlocks.get(0).getPage());
Header header = Header.builder().documentTree(context.getDocumentTree()).build();
AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), header, 0, page);
AtomicTextBlock textBlock = context.textBlockFactory.fromNumberOnPage(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), header, 0, page);
List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(header);
header.setTreeId(tocId);
header.setLeafTextBlock(textBlock);

View File

@ -80,7 +80,7 @@ public class SectionNodeFactory {
remainingBlocks.removeAll(alreadyMerged);
if (abstractPageBlock instanceof TextPageBlock) {
List<TextPageBlock> textBlocks = findTextBlocksWithSameClassificationAndAlignsY(abstractPageBlock, remainingBlocks);
List<TextPageBlock> textBlocks = findTextBlocksWithSameClassificationAndAlignsYAndSameOrientation(abstractPageBlock, remainingBlocks);
alreadyMerged.addAll(textBlocks);
DocumentGraphFactory.addParagraphOrHeadline(section, (TextPageBlock) abstractPageBlock, context, textBlocks);
} else if (abstractPageBlock instanceof TablePageBlock tablePageBlock) {
@ -162,14 +162,15 @@ public class SectionNodeFactory {
}
private List<TextPageBlock> findTextBlocksWithSameClassificationAndAlignsY(AbstractPageBlock atc, List<AbstractPageBlock> pageBlocks) {
private List<TextPageBlock> findTextBlocksWithSameClassificationAndAlignsYAndSameOrientation(AbstractPageBlock atc, List<AbstractPageBlock> pageBlocks) {
return pageBlocks.stream()
.filter(abstractTextContainer -> !abstractTextContainer.equals(atc))
.filter(abstractTextContainer -> abstractTextContainer.getPage() == atc.getPage())
.filter(abstractTextContainer -> abstractTextContainer instanceof TextPageBlock)
.filter(abstractTextContainer -> abstractTextContainer.intersectsY(atc))
.map(abstractTextContainer -> (TextPageBlock) abstractTextContainer)
.filter(abstractPageBlock -> !abstractPageBlock.equals(atc))
.filter(abstractPageBlock -> abstractPageBlock.getPage() == atc.getPage())
.filter(abstractPageBlock -> abstractPageBlock.getOrientation().equals(atc.getOrientation()))
.filter(abstractPageBlock -> abstractPageBlock.intersectsY(atc))
.filter(abstractPageBlock -> abstractPageBlock instanceof TextPageBlock)
.map(abstractPageBlock -> (TextPageBlock) abstractPageBlock)
.toList();
}

View File

@ -33,7 +33,11 @@ public class TableNodeFactory {
Set<Page> pages = tablesToMerge.stream().map(AbstractPageBlock::getPage).map(context::getPage).collect(Collectors.toSet());
List<List<Cell>> mergedRows = tablesToMerge.stream().map(TablePageBlock::getRows).flatMap(Collection::stream).toList();
Table table = Table.builder().documentTree(context.getDocumentTree()).numberOfCols(mergedRows.isEmpty() ? 0 :mergedRows.get(0).size()).numberOfRows(mergedRows.size()).build();
Table table = Table.builder()
.documentTree(context.getDocumentTree())
.numberOfCols(mergedRows.isEmpty() ? 0 : mergedRows.get(0).size())
.numberOfRows(mergedRows.size())
.build();
pages.forEach(page -> addTableToPage(page, parentNode, table));
@ -109,13 +113,13 @@ public class TableNodeFactory {
if (cell.getTextBlocks().isEmpty()) {
tableCell.setLeafTextBlock(context.getTextBlockFactory().emptyTextBlock(tableNode, context, page));
} else if (cell.getTextBlocks().size() == 1) {
textBlock = context.getTextBlockFactory().buildAtomicTextBlock(cell.getTextBlocks().get(0).getSequences(), tableCell, context, page);
textBlock = context.getTextBlockFactory().fromContext(cell.getTextBlocks().get(0).getSequences(), tableCell, context, page);
tableCell.setLeafTextBlock(textBlock);
} else if (firstTextBlockIsHeadline(cell)) {
SectionNodeFactory.addSection(tableCell, cell.getTextBlocks().stream().map(tb -> (AbstractPageBlock) tb).toList(), emptyList(), context);
} else if (cellAreaIsSmallerThanPageAreaTimesThreshold(cell, page)) {
List<TextPositionSequence> sequences = TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(cell.getTextBlocks());
textBlock = context.getTextBlockFactory().buildAtomicTextBlock(sequences, tableCell, context, page);
textBlock = context.getTextBlockFactory().fromContext(sequences, tableCell, context, page);
tableCell.setLeafTextBlock(textBlock);
} else {
cell.getTextBlocks().forEach(tb -> DocumentGraphFactory.addParagraphOrHeadline(tableCell, tb, context, emptyList()));

View File

@ -17,14 +17,14 @@ public class TextBlockFactory {
long textBlockIdx;
public AtomicTextBlock buildAtomicTextBlock(List<TextPositionSequence> sequences, SemanticNode parent, DocumentGraphFactory.Context context, Page page) {
public AtomicTextBlock fromContext(List<TextPositionSequence> sequences, SemanticNode parent, DocumentGraphFactory.Context context, Page page) {
Integer numberOnPage = context.getAndIncrementTextBlockNumberOnPage(page);
return buildAtomicTextBlock(sequences, parent, numberOnPage, page);
return fromNumberOnPage(sequences, parent, numberOnPage, page);
}
public AtomicTextBlock buildAtomicTextBlock(List<TextPositionSequence> sequences, SemanticNode parent, Integer numberOnPage, Page page) {
public AtomicTextBlock fromNumberOnPage(List<TextPositionSequence> sequences, SemanticNode parent, Integer numberOnPage, Page page) {
SearchTextWithTextPositionDto searchTextWithTextPositionDto = SearchTextWithTextPositionFactory.buildSearchTextToTextPositionModel(sequences);
int offset = stringOffset;

View File

@ -4,18 +4,42 @@ import java.util.Collections;
import java.util.Objects;
import java.util.Set;
public record MatchedRule(RuleIdentifier ruleIdentifier, String reason, String legalBasis, boolean applied, Set<RedactionEntity> references) implements Comparable<MatchedRule> {
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.experimental.FieldDefaults;
@Getter
@Builder
@AllArgsConstructor
@EqualsAndHashCode
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public final class MatchedRule implements Comparable<MatchedRule> {
@Builder.Default
RuleIdentifier ruleIdentifier = RuleIdentifier.empty();
@Builder.Default
String reason = "";
@Builder.Default
String legalBasis = "";
boolean applied;
boolean writeValueWithLineBreaks;
@Builder.Default
Set<RedactionEntity> references = Collections.emptySet();
public static MatchedRule empty() {
return new MatchedRule(RuleIdentifier.empty(), "", "", false, Collections.emptySet());
return MatchedRule.builder().build();
}
@Override
public int compareTo(MatchedRule matchedRule) {
RuleIdentifier otherRuleIdentifier = matchedRule.ruleIdentifier();
RuleIdentifier otherRuleIdentifier = matchedRule.getRuleIdentifier();
if (!Objects.equals(ruleIdentifier.type(), otherRuleIdentifier.type())) {
if (Objects.equals(otherRuleIdentifier.type(), "MAN")) {
return 1;
@ -24,10 +48,17 @@ public record MatchedRule(RuleIdentifier ruleIdentifier, String reason, String l
return -1;
}
}
if (!Objects.equals(otherRuleIdentifier.unit(), ruleIdentifier().unit())) {
if (!Objects.equals(otherRuleIdentifier.unit(), getRuleIdentifier().unit())) {
return otherRuleIdentifier.unit() - ruleIdentifier.unit();
}
return otherRuleIdentifier.id() - ruleIdentifier.id();
}
@Override
public String toString() {
return "MatchedRule[" + "ruleIdentifier=" + ruleIdentifier + ", " + "reason=" + reason + ", " + "legalBasis=" + legalBasis + ", " + "applied=" + applied + ", " + "writeValueWithLineBreaks=" + writeValueWithLineBreaks + ", " + "references=" + references + ']';
}
}

View File

@ -0,0 +1,104 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity;
import java.util.Collection;
import java.util.HashSet;
import java.util.PriorityQueue;
import java.util.Set;
import lombok.NonNull;
public interface MatchedRuleHolder {
PriorityQueue<MatchedRule> getMatchedRuleList();
default boolean isApplied() {
return getMatchedRule().isApplied();
}
default Set<RedactionEntity> getReferences() {
return getMatchedRule().getReferences();
}
default void apply(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis) {
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
}
getMatchedRuleList().add(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).legalBasis(legalBasis).applied(true).build());
}
default void applyWithLineBreaks(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis) {
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
}
getMatchedRuleList().add(MatchedRule.builder()
.ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier))
.reason(reason)
.legalBasis(legalBasis)
.applied(true)
.writeValueWithLineBreaks(true)
.build());
}
default void applyWithReferences(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis, Collection<RedactionEntity> references) {
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
}
getMatchedRuleList().add(MatchedRule.builder()
.ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier))
.reason(reason)
.legalBasis(legalBasis)
.applied(true)
.references(new HashSet<>(references))
.build());
}
default void skip(@NonNull String ruleIdentifier, String reason) {
getMatchedRuleList().add(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).build());
}
default void skipWithReferences(@NonNull String ruleIdentifier, String reason, Collection<RedactionEntity> references) {
getMatchedRuleList().add(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).references(new HashSet<>(references)).build());
}
default void addMatchedRule(MatchedRule matchedRule) {
getMatchedRuleList().add(matchedRule);
}
default void addMatchedRules(Collection<MatchedRule> matchedRules) {
getMatchedRuleList().addAll(matchedRules);
}
default int getMatchedRuleUnit() {
return getMatchedRule().getRuleIdentifier().unit();
}
default MatchedRule getMatchedRule() {
if (getMatchedRuleList().isEmpty()) {
return MatchedRule.empty();
}
return getMatchedRuleList().peek();
}
}

View File

@ -2,7 +2,6 @@ package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.e
import java.awt.geom.Rectangle2D;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.LinkedList;
@ -22,7 +21,6 @@ import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.NonNull;
import lombok.experimental.FieldDefaults;
@Data
@ -30,7 +28,7 @@ import lombok.experimental.FieldDefaults;
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
public class RedactionEntity {
public class RedactionEntity implements MatchedRuleHolder {
// initial values
@EqualsAndHashCode.Include
@ -55,7 +53,6 @@ public class RedactionEntity {
PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
// inferred on graph insertion
@EqualsAndHashCode.Include
String value;
String textBefore;
String textAfter;
@ -73,18 +70,6 @@ public class RedactionEntity {
}
public boolean isApplied() {
return getMatchedRule().applied();
}
public Set<RedactionEntity> getReferences() {
return getMatchedRule().references();
}
public boolean occursInNodeOfType(Class<? extends SemanticNode> clazz) {
return intersectingNodes.stream().anyMatch(clazz::isInstance);
@ -121,6 +106,12 @@ public class RedactionEntity {
}
public String getValueWithLineBreaks() {
return getDeepestFullyContainingNode().getTextBlock().subSequenceWithLineBreaks(getBoundary());
}
public void removeFromGraph() {
intersectingNodes.forEach(node -> node.getEntities().remove(this));
@ -129,67 +120,21 @@ public class RedactionEntity {
deepestFullyContainingNode = null;
pages = new HashSet<>();
removed = true;
}
public void remove() {
removed = true;
}
public void ignore() {
ignored = true;
}
public void apply(@NonNull String ruleIdentifier, String comment, @NonNull String legalBasis) {
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
}
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, legalBasis, true, Collections.emptySet()));
}
public void applyWithReferences(@NonNull String ruleIdentifier, String comment, @NonNull String legalBasis, Collection<RedactionEntity> references) {
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
}
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, legalBasis, true, new HashSet<>(references)));
}
public void skip(@NonNull String ruleIdentifier, String comment) {
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, "", false, Collections.emptySet()));
}
public void skipWithReferences(@NonNull String ruleIdentifier, String comment, Collection<RedactionEntity> references) {
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, "", false, new HashSet<>(references)));
}
public void addMatchedRule(MatchedRule matchedRule) {
matchedRuleList.add(matchedRule);
}
public void addMatchedRules(Collection<MatchedRule> matchedRules) {
matchedRuleList.addAll(matchedRules);
}
public int getMatchedRuleUnit() {
return getMatchedRule().ruleIdentifier().unit();
}
public MatchedRule getMatchedRule() {
if (matchedRuleList.isEmpty()) {
return MatchedRule.empty();
}
return matchedRuleList.peek();
}
public List<RedactionPosition> getRedactionPositionsPerPage() {
if (redactionPositionsPerPage == null || redactionPositionsPerPage.isEmpty()) {

View File

@ -35,6 +35,8 @@ public class Document implements GenericSemanticNode {
TextBlock textBlock;
@Builder.Default
Set<RedactionEntity> entities = new HashSet<>();
@Builder.Default
static final SectionIdentifier sectionIdentifier = SectionIdentifier.document();
@Override
@ -79,6 +81,13 @@ public class Document implements GenericSemanticNode {
}
@Override
public SectionIdentifier getSectionIdentifier() {
return sectionIdentifier;
}
@Override
public Headline getHeadline() {

View File

@ -23,6 +23,9 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(level = AccessLevel.PRIVATE)
public class Footer implements GenericSemanticNode {
@Builder.Default
final static SectionIdentifier sectionIdentifier = SectionIdentifier.empty();
List<Integer> treeId;
TextBlock leafTextBlock;
@ -55,6 +58,13 @@ public class Footer implements GenericSemanticNode {
}
@Override
public SectionIdentifier getSectionIdentifier() {
return sectionIdentifier;
}
@Override
public String toString() {

View File

@ -23,6 +23,9 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(level = AccessLevel.PRIVATE)
public class Header implements GenericSemanticNode {
@Builder.Default
final static SectionIdentifier sectionIdentifier = SectionIdentifier.empty();
List<Integer> treeId;
TextBlock leafTextBlock;
@ -55,6 +58,13 @@ public class Header implements GenericSemanticNode {
}
@Override
public SectionIdentifier getSectionIdentifier() {
return sectionIdentifier;
}
@Override
public String toString() {

View File

@ -26,6 +26,7 @@ public class Headline implements GenericSemanticNode {
List<Integer> treeId;
TextBlock leafTextBlock;
SectionIdentifier sectionIdentifier;
@EqualsAndHashCode.Exclude
DocumentTree documentTree;
@ -70,12 +71,24 @@ public class Headline implements GenericSemanticNode {
}
@Override
public SectionIdentifier getSectionIdentifier() {
if (sectionIdentifier == null) {
sectionIdentifier = SectionIdentifier.fromSearchText(getTextBlock().getSearchText());
}
return sectionIdentifier;
}
public static Headline empty() {
return Headline.builder().leafTextBlock(AtomicTextBlock.empty(-1L, 0, new Page(), -1, null)).build();
}
public boolean hasParagraphs(){
public boolean hasParagraphs() {
return getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).findFirst().isPresent();
}

View File

@ -1,7 +1,6 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
import java.awt.geom.Rectangle2D;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
@ -12,8 +11,8 @@ import java.util.Set;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.MatchedRule;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.MatchedRuleHolder;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RuleIdentifier;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
@ -23,7 +22,6 @@ import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
import lombok.NonNull;
import lombok.experimental.FieldDefaults;
@Data
@ -31,7 +29,7 @@ import lombok.experimental.FieldDefaults;
@AllArgsConstructor
@NoArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
public class Image implements GenericSemanticNode {
public class Image implements GenericSemanticNode, MatchedRuleHolder {
List<Integer> treeId;
String id;
@ -40,6 +38,7 @@ public class Image implements GenericSemanticNode {
boolean transparent;
Rectangle2D position;
boolean removed;
boolean ignored;
@Builder.Default
@ -56,72 +55,21 @@ public class Image implements GenericSemanticNode {
Set<RedactionEntity> entities = new HashSet<>();
public void apply(@NonNull String ruleIdentifier, String comment, @NonNull String legalBasis) {
public boolean isActive() {
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
}
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, legalBasis, true, Collections.emptySet()));
return !removed && !ignored;
}
public void applyWithReferences(@NonNull String ruleIdentifier, String comment, @NonNull String legalBasis, Collection<RedactionEntity> references) {
public void ignore() {
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
}
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, legalBasis, true, new HashSet<>(references)));
ignored = true;
}
public void skip(@NonNull String ruleIdentifier, String comment) {
public void remove() {
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, "", false, Collections.emptySet()));
}
public void skipWithReferences(@NonNull String ruleIdentifier, String comment, Collection<RedactionEntity> references) {
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, "", false, new HashSet<>(references)));
}
public void addMatchedRule(MatchedRule matchedRule) {
matchedRuleList.add(matchedRule);
}
public void addMatchedRules(Collection<MatchedRule> matchedRules) {
matchedRuleList.addAll(matchedRules);
}
public boolean isApplied() {
return getMatchedRule().applied();
}
public Set<RedactionEntity> getReferences() {
return getMatchedRule().references();
}
public int getMatchedRuleUnit() {
return getMatchedRule().ruleIdentifier().unit();
}
public MatchedRule getMatchedRule() {
if (matchedRuleList.isEmpty()) {
return MatchedRule.empty();
}
return matchedRuleList.peek();
removed = true;
}

View File

@ -8,7 +8,6 @@ import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Do
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
import java.util.stream.Stream;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -49,6 +48,13 @@ public class Section implements GenericSemanticNode {
}
@Override
public SectionIdentifier getSectionIdentifier() {
return getHeadline().getSectionIdentifier();
}
@Override
public TextBlock getTextBlock() {
@ -75,19 +81,22 @@ public class Section implements GenericSemanticNode {
}
public boolean anyHeadlineContainsString(String value){
public boolean anyHeadlineContainsString(String value) {
return streamChildrenOfType(NodeType.HEADLINE)//
.map(node -> (Headline) node).anyMatch(h -> h.containsString(value));
}
public boolean anyHeadlineContainsStringIgnoreCase(String value){
public boolean anyHeadlineContainsStringIgnoreCase(String value) {
return streamChildrenOfType(NodeType.HEADLINE)//
.map(node -> (Headline) node).anyMatch(h -> h.containsStringIgnoreCase(value));
}
public boolean hasParagraphs(){
public boolean hasParagraphs() {
return streamAllSubNodesOfType(NodeType.PARAGRAPH).findFirst().isPresent();
}

View File

@ -0,0 +1,123 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.experimental.FieldDefaults;
@AllArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class SectionIdentifier {
static Pattern numericalIdentifierPattern = Pattern.compile("^[\\s]?(\\d+)[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?");
private enum Format {
EMPTY,
NUMERICAL,
DOCUMENT
}
Format format;
String identifierString;
List<Integer> identifiers;
boolean asChild;
public static SectionIdentifier fromSearchText(String headline) {
if (headline == null || headline.isEmpty() || headline.isBlank()) {
return SectionIdentifier.empty();
}
Matcher numericalIdentifierMatcher = numericalIdentifierPattern.matcher(headline);
if (numericalIdentifierMatcher.find()) {
return buildNumericalSectionIdentifier(headline, numericalIdentifierMatcher);
}
// more formats here
return SectionIdentifier.empty();
}
public static SectionIdentifier asChildOf(SectionIdentifier sectionIdentifier) {
return new SectionIdentifier(sectionIdentifier.format, sectionIdentifier.toString(), sectionIdentifier.identifiers, true);
}
public static SectionIdentifier document() {
return new SectionIdentifier(Format.DOCUMENT, "document", Collections.emptyList(), false);
}
public static SectionIdentifier empty() {
return new SectionIdentifier(Format.EMPTY, "empty", Collections.emptyList(), false);
}
private static SectionIdentifier buildNumericalSectionIdentifier(String headline, Matcher numericalIdentifierMatcher) {
String identifierString = headline.substring(numericalIdentifierMatcher.start(), numericalIdentifierMatcher.end());
List<Integer> identifiers = new LinkedList<>();
for (int i = 1; i <= 4; i++) {
String numericalIdentifier = numericalIdentifierMatcher.group(i);
if (numericalIdentifier == null || numericalIdentifier.equals("0") || numericalIdentifier.isEmpty() || numericalIdentifier.isBlank()) {
break;
}
identifiers.add(Integer.parseInt(numericalIdentifier.trim()));
}
return new SectionIdentifier(Format.NUMERICAL, identifierString, identifiers.stream().toList(), false);
}
/**
* Determines if the current section is the parent of the given section.
*
* @param sectionIdentifier The section identifier to compare against.
* @return true if the current section is the parent of the given section, false otherwise.
*/
public boolean isParentOf(SectionIdentifier sectionIdentifier) {
if (this.format.equals(Format.EMPTY)) {
return false;
}
if (this.format.equals(Format.DOCUMENT)) {
return true;
}
if (!this.format.equals(sectionIdentifier.format)) {
return false;
}
if (this.identifiers.size() >= sectionIdentifier.identifiers.size() && !(this.identifiers.size() == sectionIdentifier.identifiers.size() && sectionIdentifier.asChild)) {
return false;
}
for (int i = 0; i < this.identifiers.size(); i++) {
if (!this.identifiers.get(i).equals(sectionIdentifier.identifiers.get(i))) {
return false;
}
}
return true;
}
public boolean isChildOf(SectionIdentifier sectionIdentifier) {
if (this.format.equals(Format.DOCUMENT) || this.format.equals(Format.EMPTY)) {
return false;
}
return sectionIdentifier.isParentOf(this);
}
@Override
public String toString() {
return identifierString;
}
}

View File

@ -115,6 +115,17 @@ public interface SemanticNode {
}
/**
* Returns a SectionIdentifier, such that it acts as a child of the first Headline associated with this SemanticNode.
*
* @return The SectionIdentifier from the first Headline.
*/
default SectionIdentifier getSectionIdentifier() {
return SectionIdentifier.asChildOf(getHeadline().getSectionIdentifier());
}
/**
* Checks if its TreeId has a length greater than zero.
*

View File

@ -9,6 +9,8 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.AtomicPositionBlockData;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.AtomicTextBlockData;
@ -200,6 +202,38 @@ public class AtomicTextBlock implements TextBlock {
}
@Override
public String subSequenceWithLineBreaks(Boundary boundary) {
if (boundary.length() == 0 || !getBoundary().contains(boundary)) {
return "";
}
CharSequence subSequence = subSequence(boundary);
Set<Integer> lbInBoundary = lineBreaks.stream().filter(boundary::contains).collect(Collectors.toSet());
if (boundary.end() == getBoundary().end()) {
lbInBoundary.add(getBoundary().length());
}
StringBuilder sb = new StringBuilder();
for (int i = 0; i < subSequence.length(); i++) {
char character = subSequence.charAt(i);
if (lbInBoundary.contains(i + 1)) {
// always plus one, due to the linebreaks being an exclusive end index
if (!Character.isWhitespace(character)) {
lbInBoundary.remove(i + 1);
lbInBoundary.add(i + 2);
sb.append(character);
continue;
}
sb.append("\n");
} else {
sb.append(character);
}
}
return sb.toString();
}
private List<Integer> getAllLineBreaksInBoundary(Boundary boundary) {
return getLineBreaks().stream().map(linebreak -> linebreak + this.boundary.start()).filter(boundary::contains).toList();

View File

@ -172,6 +172,34 @@ public class ConcatenatedTextBlock implements TextBlock {
}
@Override
public String subSequenceWithLineBreaks(Boundary boundary) {
if (boundary.length() == 0 || !getBoundary().contains(boundary)) {
return "";
}
List<AtomicTextBlock> textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(boundary);
if (textBlocks.size() == 1) {
return textBlocks.get(0).subSequenceWithLineBreaks(boundary);
}
StringBuilder sb = new StringBuilder();
AtomicTextBlock firstTextBlock = textBlocks.get(0);
sb.append(firstTextBlock.subSequenceWithLineBreaks(new Boundary(boundary.start(), firstTextBlock.getBoundary().end())));
for (AtomicTextBlock textBlock : textBlocks.subList(1, textBlocks.size() - 1)) {
sb.append(textBlock.searchTextWithLineBreaks());
}
var lastTextBlock = textBlocks.get(textBlocks.size() - 1);
sb.append(lastTextBlock.subSequenceWithLineBreaks(new Boundary(lastTextBlock.getBoundary().start(), boundary.end())));
return sb.toString();
}
private Map<Page, List<Rectangle2D>> mergeEntityPositionsWithSamePageNode(Map<Page, List<Rectangle2D>> map1, Map<Page, List<Rectangle2D>> map2) {
Map<Page, List<Rectangle2D>> mergedMap = new HashMap<>(map1);

View File

@ -42,9 +42,18 @@ public interface TextBlock extends CharSequence {
Map<Page, List<Rectangle2D>> getPositionsPerPage(Boundary stringBoundary);
String subSequenceWithLineBreaks(Boundary boundary);
int numberOfLines();
default String searchTextWithLineBreaks() {
return subSequenceWithLineBreaks(getBoundary());
}
default int indexOf(String searchTerm) {
return indexOf(searchTerm, getBoundary().start());

View File

@ -23,6 +23,7 @@ import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Do
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionPosition;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.NodeType;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Page;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.TableCell;
@ -90,7 +91,9 @@ public class EntityCreationService {
return entityBoundaries.stream()
.map(boundary -> boundary.trim(node.getTextBlock()))
.filter(boundary -> isValidEntityBoundary(node.getTextBlock(), boundary))
.map(boundary -> byBoundary(boundary, type, entityType, node));
.map(boundary -> byBoundary(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
}
@ -129,7 +132,9 @@ public class EntityCreationService {
return searchImplementation.getBoundaries(node.getTextBlock(), node.getBoundary())
.stream()
.filter(boundary -> isValidEntityBoundary(node.getTextBlock(), boundary))
.map(bounds -> byBoundary(bounds, type, entityType, node));
.map(bounds -> byBoundary(bounds, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
}
@ -141,7 +146,9 @@ public class EntityCreationService {
.stream()
.map(boundary -> toLineAfterBoundary(textBlock, boundary))
.filter(boundary -> isValidEntityBoundary(textBlock, boundary))
.map(boundary -> byBoundary(boundary, type, entityType, node));
.map(boundary -> byBoundary(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
}
@ -152,19 +159,24 @@ public class EntityCreationService {
.stream()
.map(boundary -> toLineAfterBoundary(textBlock, boundary))
.filter(boundary -> isValidEntityBoundary(textBlock, boundary))
.map(boundary -> byBoundary(boundary, type, entityType, node));
.map(boundary -> byBoundary(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
}
public Stream<RedactionEntity> byRegexWithLinebreaks(String regexPattern, String type, EntityType entityType, SemanticNode node) {
return byRegexWithLinebreaks(regexPattern, type, entityType, 0, node);
public Stream<RedactionEntity> byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, SemanticNode node) {
return byRegexWithLineBreaks(regexPattern, type, entityType, 0, node);
}
public Stream<RedactionEntity> byRegexWithLinebreaksIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) {
return byRegexWithLinebreaksIgnoreCase(regexPattern, type, entityType, 0, node);
public Stream<RedactionEntity> byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) {
return byRegexWithLineBreaksIgnoreCase(regexPattern, type, entityType, 0, node);
}
public Stream<RedactionEntity> byRegex(String regexPattern, String type, EntityType entityType, SemanticNode node) {
return byRegex(regexPattern, type, entityType, 0, node);
@ -177,20 +189,33 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> byRegexWithLinebreaks(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
public Stream<RedactionEntity> byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
return RedactionSearchUtility.findBoundariesByRegexWithLinebreaks(regexPattern, group, node.getTextBlock()).stream().map(boundary -> byBoundary(boundary, type, entityType, node));
return RedactionSearchUtility.findBoundariesByRegexWithLineBreaks(regexPattern, group, node.getTextBlock())
.stream()
.map(boundary -> byBoundary(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
}
public Stream<RedactionEntity> byRegexWithLinebreaksIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
public Stream<RedactionEntity> byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
return RedactionSearchUtility.findBoundariesByRegexWithLinebreaksIgnoreCase(regexPattern, group, node.getTextBlock()).stream().map(boundary -> byBoundary(boundary, type, entityType, node));
return RedactionSearchUtility.findBoundariesByRegexWithLineBreaksIgnoreCase(regexPattern, group, node.getTextBlock())
.stream()
.map(boundary -> byBoundary(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
}
public Stream<RedactionEntity> byRegex(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
return RedactionSearchUtility.findBoundariesByRegex(regexPattern, group, node.getTextBlock()).stream().map(boundary -> byBoundary(boundary, type, entityType, node));
return RedactionSearchUtility.findBoundariesByRegex(regexPattern, group, node.getTextBlock())
.stream()
.map(boundary -> byBoundary(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
}
@ -198,13 +223,25 @@ public class EntityCreationService {
return RedactionSearchUtility.findBoundariesByRegexIgnoreCase(regexPattern, group, node.getTextBlock())
.stream()
.map(boundary -> byBoundary(boundary, type, entityType, node));
.map(boundary -> byBoundary(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
}
public Stream<RedactionEntity> byString(String keyword, String type, EntityType entityType, SemanticNode node) {
return RedactionSearchUtility.findBoundariesByString(keyword, node.getTextBlock()).stream().map(boundary -> byBoundary(boundary, type, entityType, node));
return RedactionSearchUtility.findBoundariesByString(keyword, node.getTextBlock())
.stream()
.map(boundary -> byBoundary(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
}
public Stream<RedactionEntity> bySemanticNodeParagraphsOnly(SemanticNode node, String type, EntityType entityType) {
return node.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(semanticNode -> bySemanticNode(semanticNode, type, entityType)).filter(Optional::isPresent).map(Optional::get);
}
@ -218,18 +255,18 @@ public class EntityCreationService {
if (!isValidEntityBoundary(node.getTextBlock(), boundary)) {
return Optional.empty();
}
return Optional.of(byBoundary(boundary, type, entityType, node));
return byBoundary(boundary, type, entityType, node);
}
public RedactionEntity byPrefixExpansionRegex(RedactionEntity entity, String regexPattern) {
public Optional<RedactionEntity> byPrefixExpansionRegex(RedactionEntity entity, String regexPattern) {
int expandedStart = getExpandedStartByRegex(entity, regexPattern);
return byBoundary(new Boundary(expandedStart, entity.getBoundary().end()), entity.getType(), entity.getEntityType(), entity.getDeepestFullyContainingNode());
}
public RedactionEntity bySuffixExpansionRegex(RedactionEntity entity, String regexPattern) {
public Optional<RedactionEntity> bySuffixExpansionRegex(RedactionEntity entity, String regexPattern) {
int expandedEnd = getExpandedEndByRegex(entity, regexPattern);
expandedEnd = truncateEndIfLineBreakIsBetween(entity.getBoundary().end(), expandedEnd, entity.getDeepestFullyContainingNode().getTextBlock());
@ -246,7 +283,32 @@ public class EntityCreationService {
}
public RedactionEntity byBoundary(Boundary boundary, String type, EntityType entityType, SemanticNode node) {
/**
* Creates a redaction entity based on the given boundary, type, entity type, and semantic node.
* If the document already contains an equal redaction entity, then en empty Optional is returned.
*
* @param boundary The boundary of the redaction entity.
* @param type The type of the redaction entity.
* @param entityType The entity type of the redaction entity.
* @param node The semantic node to associate with the redaction entity.
* @return An Optional containing the redaction entity, or an empty Optional if the entity already exists.
*/
public Optional<RedactionEntity> byBoundary(Boundary boundary, String type, EntityType entityType, SemanticNode node) {
if (!node.getBoundary().contains(boundary)) {
throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", boundary, node.getBoundary(), node));
}
Boundary trimmedBoundary = boundary.trim(node.getTextBlock());
RedactionEntity entity = RedactionEntity.initialEntityNode(trimmedBoundary, type, entityType);
if (node.getEntities().contains(entity)) {
return Optional.empty();
}
addEntityToGraph(entity, node);
return Optional.of(entity);
}
public RedactionEntity forceByBoundary(Boundary boundary, String type, EntityType entityType, SemanticNode node) {
Boundary trimmedBoundary = boundary.trim(node.getTextBlock());
RedactionEntity entity = RedactionEntity.initialEntityNode(trimmedBoundary, type, entityType);
@ -281,19 +343,15 @@ public class EntityCreationService {
}
public RedactionEntity byNerEntity(NerEntities.NerEntity nerEntity, EntityType entityType, SemanticNode semanticNode) {
public Optional<RedactionEntity> byNerEntity(NerEntities.NerEntity nerEntity, EntityType entityType, SemanticNode semanticNode) {
RedactionEntity entity = byBoundary(nerEntity.boundary(), nerEntity.type(), entityType, semanticNode);
entity.addEngine(Engine.NER);
return entity;
return byBoundary(nerEntity.boundary(), nerEntity.type(), entityType, semanticNode).stream().peek(entity -> entity.addEngine(Engine.NER)).findAny();
}
public RedactionEntity byNerEntity(NerEntities.NerEntity nerEntity, String type, EntityType entityType, SemanticNode semanticNode) {
public Optional<RedactionEntity> byNerEntity(NerEntities.NerEntity nerEntity, String type, EntityType entityType, SemanticNode semanticNode) {
RedactionEntity entity = byBoundary(nerEntity.boundary(), type, entityType, semanticNode);
entity.addEngine(Engine.NER);
return entity;
return byBoundary(nerEntity.boundary(), type, entityType, semanticNode).stream().peek(entity -> entity.addEngine(Engine.NER)).findAny();
}

View File

@ -116,7 +116,7 @@ public class RectangleTransformations {
@Override
public BiConsumer<BBox, Rectangle2D> accumulator() {
return (bb, rect) -> bb.addRectangle(rect.getMinX(), rect.getMinY(), rect.getMaxX(), rect.getMaxY());
return BBox::addRectangle;
}
@ -154,7 +154,12 @@ public class RectangleTransformations {
Double upperRightY;
public void addRectangle(double lowerLeftX, double lowerLeftY, double upperRightX, double upperRightY) {
public void addRectangle(Rectangle2D rectangle2D) {
double lowerLeftX = Math.min(rectangle2D.getMinX(), rectangle2D.getMaxX());
double lowerLeftY = Math.min(rectangle2D.getMinY(), rectangle2D.getMaxY());
double upperRightX = Math.max(rectangle2D.getMinX(), rectangle2D.getMaxX());
double upperRightY = Math.max(rectangle2D.getMinY(), rectangle2D.getMaxY());
if (this.lowerLeftX == null) {
this.lowerLeftX = lowerLeftX;

View File

@ -102,17 +102,17 @@ public class RedactionSearchUtility {
}
public static List<Boundary> findBoundariesByRegexWithLinebreaks(String regexPattern, int group, TextBlock textBlock) {
public static List<Boundary> findBoundariesByRegexWithLineBreaks(String regexPattern, int group, TextBlock textBlock) {
Pattern pattern = Patterns.getCompiledMultilinePattern(regexPattern, false);
return getBoundariesByPatternWithLinebreaks(textBlock, group, pattern);
return getBoundariesByPatternWithLineBreaks(textBlock, group, pattern);
}
public static List<Boundary> findBoundariesByRegexWithLinebreaksIgnoreCase(String regexPattern, int group, TextBlock textBlock) {
public static List<Boundary> findBoundariesByRegexWithLineBreaksIgnoreCase(String regexPattern, int group, TextBlock textBlock) {
Pattern pattern = Patterns.getCompiledMultilinePattern(regexPattern, true);
return getBoundariesByPatternWithLinebreaks(textBlock, group, pattern);
return getBoundariesByPatternWithLineBreaks(textBlock, group, pattern);
}
@ -134,21 +134,10 @@ public class RedactionSearchUtility {
}
private static List<Boundary> getBoundariesByPatternWithLinebreaks(TextBlock textBlock, int group, Pattern pattern) {
private static List<Boundary> getBoundariesByPatternWithLineBreaks(TextBlock textBlock, int group, Pattern pattern) {
StringBuilder stringBuilder = new StringBuilder();
textBlock.getAtomicTextBlocks().forEach(at -> {
if (at.numberOfLines() > 1) {
for (int i = 0; i < at.numberOfLines(); i++) {
stringBuilder.append(at.getLine(i));
stringBuilder.setCharAt(stringBuilder.length() - 1, '\n');
}
} else {
stringBuilder.append(at.getSearchText()).setCharAt(stringBuilder.length() - 1, '\n');
}
});
Matcher matcher = pattern.matcher(stringBuilder.toString());
String searchTextWithLineBreaks = textBlock.searchTextWithLineBreaks();
Matcher matcher = pattern.matcher(searchTextWithLineBreaks);
List<Boundary> boundaries = new LinkedList<>();
while (matcher.find()) {
boundaries.add(new Boundary(matcher.start(group) + textBlock.getBoundary().start(), matcher.end(group) + textBlock.getBoundary().start()));

View File

@ -84,6 +84,7 @@ public class AnalyzeService {
@Timed("redactmanager_analyzeDocumentStructure")
public AnalyzeResult analyzeDocumentStructure(StructureAnalyzeRequest analyzeRequest) {
log.info("Starting Structure Analysis for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
long startTime = System.currentTimeMillis();
ClassificationDocument classifiedDoc;
@ -92,25 +93,29 @@ public class AnalyzeService {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
FileType.ORIGIN));
log.info("Loaded PDF for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
Map<Integer, List<ClassifiedImage>> pdfImages = null;
if (redactionServiceSettings.isEnableImageClassification()) {
pdfImages = imageServiceResponseAdapter.convertImages(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
log.info("Loaded image service response for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
}
classifiedDoc = pdfSegmentationService.parseDocument(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), storedObjectStream, pdfImages);
log.info("Parsed document for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
} catch (Exception e) {
throw new RedactionException(e);
}
Document document = DocumentGraphFactory.buildDocumentGraph(classifiedDoc);
log.info("Built Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
SectionGrid sectionGrid = sectionGridCreatorService.createSectionGrid(document);
log.info("Built section grid for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
log.info("Store document graph, text, simplified text, and section grid for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.TEXT, DocumentData.fromDocument(document));
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.SIMPLIFIED_TEXT, toSimplifiedText(document));
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, sectionGrid);
log.info("Stored document graph, text, simplified text, and section grid for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
return AnalyzeResult.builder()
.dossierId(analyzeRequest.getDossierId())
@ -125,21 +130,27 @@ public class AnalyzeService {
@Timed("redactmanager_analyze")
public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) {
log.info("Starting Analysis for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
long startTime = System.currentTimeMillis();
Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
log.info("Loaded Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
NerEntities nerEntities = getEntityRecognitionEntities(analyzeRequest, document);
log.info("Loaded Ner Entities for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
log.info("Updated Dictionary for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
long rulesVersion = droolsExecutionService.getRulesVersion(analyzeRequest.getDossierTemplateId());
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
log.info("Updated Rules to Version {} for file {} in dossier {}", rulesVersion, analyzeRequest.getFileId(), analyzeRequest.getDossierId());
log.debug("Starting Dictionary Search");
long dictSearchStart = System.currentTimeMillis();
entityRedactionService.addDictionaryEntities(dictionary, document);
log.debug("Finished Dictionary Search in {} ms", System.currentTimeMillis() - dictSearchStart);
log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
Set<FileAttribute> addedFileAttributes = entityRedactionService.addRuleEntities(dictionary, document, kieContainer, analyzeRequest, nerEntities);
log.info("Finished Rule Execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
List<RedactionLogEntry> redactionLogEntries = redactionLogCreatorService.createRedactionLog(document, analyzeRequest.getDossierTemplateId());
@ -168,10 +179,12 @@ public class AnalyzeService {
@SneakyThrows
public AnalyzeResult reanalyze(@RequestBody AnalyzeRequest analyzeRequest) {
log.info("Starting Reanalysis for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
long startTime = System.currentTimeMillis();
RedactionLog previousRedactionLog = redactionStorageService.getRedactionLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
log.info("Loaded previous redaction log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
log.info("Loaded Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
// not yet ready for reanalysis
if (previousRedactionLog == null || document == null || document.getNumberOfPages() == 0) {
return analyze(analyzeRequest);
@ -183,6 +196,7 @@ public class AnalyzeService {
Set<Integer> sectionsToReanalyseIds = getSectionsToReanalyseIds(analyzeRequest, previousRedactionLog, document, dictionaryIncrement);
List<SemanticNode> sectionsToReAnalyse = getSectionsToReAnalyse(document, sectionsToReanalyseIds);
log.info("{} Sections to reanalyze found for file {} in dossier {}", sectionsToReanalyseIds.size(), analyzeRequest.getFileId(), analyzeRequest.getDossierId());
if (sectionsToReAnalyse.isEmpty()) {
return finalizeAnalysis(analyzeRequest,
@ -195,15 +209,16 @@ public class AnalyzeService {
}
NerEntities nerEntities = getEntityRecognitionEntitiesFilteredBySectionIds(analyzeRequest, document, sectionsToReanalyseIds);
log.info("Reanalyze {} sections with {} Ner Entities", sectionsToReAnalyse.size(), nerEntities.getNerEntityList().size());
log.info("Loaded Ner Entities for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
log.info("Updated Rules for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
sectionsToReAnalyse.forEach(node -> entityRedactionService.addDictionaryEntities(dictionary, node));
log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
long ruleStart = System.currentTimeMillis();
Set<FileAttribute> addedFileAttributes = entityRedactionService.addRuleEntities(dictionary, document, sectionsToReAnalyse, kieContainer, analyzeRequest, nerEntities);
log.info("Rule execution took {} ms", System.currentTimeMillis() - ruleStart);
log.info("Finished Rule Execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
List<RedactionLogEntry> newRedactionLogEntries = redactionLogCreatorService.createRedactionLog(document, analyzeRequest.getDossierTemplateId());
@ -244,7 +259,10 @@ public class AnalyzeService {
analyzeRequest.getFileId(),
redactionLog,
analyzeRequest.getAnalysisNumber());
log.info("Created Redaction Log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLogChange.getRedactionLog());
log.info("Stored Redaction Log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
long duration = System.currentTimeMillis() - startTime;

View File

@ -252,11 +252,11 @@ public class DictionaryService {
falsePositives.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT)));
falseRecommendations.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT)));
}
log.info("Dictionary update returned {} entries {} falsePositives and {} falseRecommendations for type {}",
log.debug("Dictionary update returned {} entries {} falsePositives and {} falseRecommendations for type {}",
entries.size(),
falsePositives.size(),
falseRecommendations.size(),
type.getType());
typeId);
return new DictionaryEntries(entries, falsePositives, falseRecommendations);
}
@ -304,7 +304,8 @@ public class DictionaryService {
if (dossierDictionaryExists(dossierId)) {
var dossierRepresentation = getDossierDictionary(dossierId);
var dossierDictionaries = dossierRepresentation.getDictionary();
mergedDictionaries = convertCommonsDictionaryModel(dictionaryMergeService.getMergedDictionary(convertDictionaryModel(dossierTemplateDictionaries), convertDictionaryModel(dossierDictionaries)));
mergedDictionaries = convertCommonsDictionaryModel(dictionaryMergeService.getMergedDictionary(convertDictionaryModel(dossierTemplateDictionaries),
convertDictionaryModel(dossierDictionaries)));
dossierDictionaryVersion = dossierRepresentation.getDictionaryVersion();
} else {
mergedDictionaries = new ArrayList<>();
@ -367,23 +368,37 @@ public class DictionaryService {
}
}
private List<CommonsDictionaryModel> convertDictionaryModel(List<DictionaryModel> dictionaries) {
return dictionaries.stream().map(d -> CommonsDictionaryModel.builder()
.type(d.getType())
.rank(d.getRank())
.color(d.getColor())
.caseInsensitive(d.isCaseInsensitive())
.hint(d.isHint())
.isDossierDictionary(d.isDossierDictionary())
.entries(d.getEntries())
.falsePositives(d.getFalsePositives())
.falseRecommendations(d.getFalseRecommendations())
.build()).collect(Collectors.toList());
return dictionaries.stream()
.map(d -> CommonsDictionaryModel.builder()
.type(d.getType())
.rank(d.getRank())
.color(d.getColor())
.caseInsensitive(d.isCaseInsensitive())
.hint(d.isHint())
.isDossierDictionary(d.isDossierDictionary())
.entries(d.getEntries())
.falsePositives(d.getFalsePositives())
.falseRecommendations(d.getFalseRecommendations())
.build())
.collect(Collectors.toList());
}
private List<DictionaryModel> convertCommonsDictionaryModel(List<CommonsDictionaryModel> commonsDictionaries) {
return commonsDictionaries.stream().map(cd ->
new DictionaryModel(cd.getType(), cd.getRank(), cd.getColor(), cd.isCaseInsensitive(), cd.isHint(), cd.getEntries(), cd.getFalsePositives(), cd.getFalseRecommendations(), cd.isDossierDictionary()))
return commonsDictionaries.stream()
.map(cd -> new DictionaryModel(cd.getType(),
cd.getRank(),
cd.getColor(),
cd.isCaseInsensitive(),
cd.isHint(),
cd.getEntries(),
cd.getFalsePositives(),
cd.getFalseRecommendations(),
cd.isDossierDictionary()))
.collect(Collectors.toList());
}

View File

@ -23,7 +23,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribu
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException;
import com.knecon.fforesight.tenantcommons.TenantContext;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityCreationService;
@ -91,6 +90,7 @@ public class DroolsExecutionService {
kieSession.setGlobal("dictionary", dictionary);
kieSession.setGlobal("nerEntitiesAdapter", nerEntitiesAdapter);
kieSession.insert(document);
document.getEntities().forEach(kieSession::insert);
sectionsToAnalyze.forEach(kieSession::insert);
sectionsToAnalyze.stream().flatMap(SemanticNode::streamAllSubNodes).forEach(kieSession::insert);

View File

@ -75,7 +75,7 @@ public class ManualRedactionSurroundingTextService {
Set<RedactionEntity> entities = RedactionSearchUtility.findBoundariesByString(value, node.getTextBlock())
.stream()
.map(boundary -> entityCreationService.byBoundary(boundary, "searchHelper", EntityType.RECOMMENDATION, node))
.map(boundary -> entityCreationService.forceByBoundary(boundary, "searchHelper", EntityType.RECOMMENDATION, node))
.collect(Collectors.toSet());
RedactionEntity correctEntity = getEntityOnCorrectPosition(entities, toFindPositions);

View File

@ -35,8 +35,9 @@ public class RedactionLogCreatorService {
document.getEntities()
.stream()
.filter(RedactionLogCreatorService::isEntityOrRecommendationType)
.filter(entity -> !entity.isRemoved())
.forEach(entityNode -> entries.addAll(toRedactionLogEntries(entityNode, processedIds, dossierTemplateId)));
document.streamAllImages().forEach(imageNode -> entries.add(createRedactionLogEntry(imageNode, dossierTemplateId)));
document.streamAllImages().filter(image -> !image.isRemoved()).forEach(imageNode -> entries.add(createRedactionLogEntry(imageNode, dossierTemplateId)));
return entries;
}
@ -80,17 +81,14 @@ public class RedactionLogCreatorService {
private RedactionLogEntry createRedactionLogEntry(RedactionEntity entity, String dossierTemplateId) {
Set<String> referenceIds = new HashSet<>();
entity.getReferences()
.stream()
.filter(redactionEntity -> !redactionEntity.isRemoved() && !redactionEntity.isIgnored())
.forEach(ref -> ref.getRedactionPositionsPerPage().forEach(pos -> referenceIds.add(pos.getId())));
entity.getReferences().stream().filter(RedactionEntity::isActive).forEach(ref -> ref.getRedactionPositionsPerPage().forEach(pos -> referenceIds.add(pos.getId())));
int sectionNumber = entity.getDeepestFullyContainingNode().getTreeId().isEmpty() ? 0 : entity.getDeepestFullyContainingNode().getTreeId().get(0);
return RedactionLogEntry.builder()
.color(getColor(entity.getType(), dossierTemplateId, entity.isApplied()))
.reason(entity.getMatchedRule().reason())
.legalBasis(entity.getMatchedRule().legalBasis())
.value(entity.getValue())
.reason(entity.getMatchedRule().getReason())
.legalBasis(entity.getMatchedRule().getLegalBasis())
.value(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue())
.type(entity.getType())
.redacted(entity.isApplied())
.isHint(isHint(entity.getType(), dossierTemplateId))
@ -98,7 +96,7 @@ public class RedactionLogCreatorService {
.isFalsePositive(entity.getEntityType().equals(EntityType.FALSE_POSITIVE) || entity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION))
.section(entity.getDeepestFullyContainingNode().toString())
.sectionNumber(sectionNumber)
.matchedRule(entity.getMatchedRule().ruleIdentifier().toString())
.matchedRule(entity.getMatchedRule().getRuleIdentifier().toString())
.isDictionaryEntry(entity.isDictionaryEntry())
.textAfter(entity.getTextAfter())
.textBefore(entity.getTextBefore())
@ -120,9 +118,9 @@ public class RedactionLogCreatorService {
.isImage(true)
.type(imageType)
.redacted(image.isApplied())
.reason(image.getMatchedRule().reason())
.legalBasis(image.getMatchedRule().legalBasis())
.matchedRule(image.getMatchedRule().ruleIdentifier().toString())
.reason(image.getMatchedRule().getReason())
.legalBasis(image.getMatchedRule().getLegalBasis())
.matchedRule(image.getMatchedRule().getRuleIdentifier().toString())
.isHint(dictionaryService.isHint(image.getImageType().toString(), dossierTemplateId))
.isDictionaryEntry(false)
.isRecommendation(false)

View File

@ -55,7 +55,7 @@ class SectionFinderService {
}
});
log.info("Took: {} milliseconds to find sections to reanalyze", System.currentTimeMillis() - start);
log.debug("Took: {} milliseconds to find sections to reanalyze", System.currentTimeMillis() - start);
return sectionsToReanalyse;
}

View File

@ -9,9 +9,9 @@ import lombok.experimental.UtilityClass;
@UtilityClass
public final class Patterns {
public static Map<String, Pattern> patternCache = new HashMap<>();
public static final Map<String, Pattern> patternCache = new HashMap<>();
public static Pattern AUTHOR_TABLE_SPLITTER = Pattern.compile(
public static final Pattern AUTHOR_TABLE_SPLITTER = Pattern.compile(
"(((((di)|(van)) )|[A-Z])?[A-ZÄÖÜ][\\wäöüéèê]{2,500}( ?[A-ZÄÖÜ]{1,2}\\.){1,3})|(((((di)|(van)) )|[A-Z])?[A-ZÄÖÜ][\\wäöüéèê]{2,500}( ?[A-ZÄÖÜ]{1,2} ){1,3})");

View File

@ -0,0 +1,163 @@
package com.iqser.red.service.redaction.v1.server;
import static org.mockito.Mockito.when;
import static org.wildfly.common.Assert.assertTrue;
import java.io.FileOutputStream;
import java.io.IOException;
import java.time.OffsetDateTime;
import java.util.List;
import java.util.Set;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.FilterType;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import com.knecon.fforesight.tenantcommons.TenantContext;
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class)
public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
private static final String RULES = loadFromClassPath("drools/acceptance_rules.drl");
@Configuration
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
@ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)})
public static class RedactionIntegrationTestConfiguration {
@Bean
@Primary
public StorageService inmemoryStorage() {
return new FileSystemBackedStorageService();
}
}
@BeforeEach
public void stubClients() {
TenantContext.setTenantId("redaction");
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(RULES));
loadDictionaryForTest();
loadTypeForTest();
loadNerForTest();
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, false)).thenReturn(getTypeResponse());
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS_INDICATOR)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build()));
mockDictionaryCalls(null);
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
}
@Test
public void acceptanceTests() throws IOException {
AnalyzeRequest request = uploadFileToStorage("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf");
System.out.println("Start Full integration test");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
System.out.println("Finished structure analysis");
AnalyzeResult result = analyzeService.analyze(request);
System.out.println("Finished analysis");
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var publishedInformationEntry1 = redactionLog.getRedactionLogEntry()
.stream()
.filter(entry -> entry.getType().equals("published_information"))
.filter(entry -> entry.getValue().equals("Oxford University Press"))
.findFirst()
.orElseThrow();
var asyaLyon1 = redactionLog.getRedactionLogEntry()
.stream()
.filter(entry -> entry.getType().equals("CBI_author"))
.filter(entry -> entry.getValue().equals("Asya Lyon"))
.filter(entry -> entry.getSectionNumber() == publishedInformationEntry1.getSectionNumber())
.findFirst()
.orElseThrow();
// assertFalse(asyaLyon1.isRedacted());
var idRemoval = IdRemoval.builder()
.requestDate(OffsetDateTime.now())
.annotationId(publishedInformationEntry1.getId())
.status(AnnotationStatus.APPROVED)
.fileId(TEST_FILE_ID)
.build();
var manualRedactions = ManualRedactions.builder().idsToRemove(Set.of(idRemoval)).build();
request.setManualRedactions(manualRedactions);
analyzeService.reanalyze(request);
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var publishedInformationEntry2 = redactionLog.getRedactionLogEntry()
.stream()
.filter(entry -> entry.getType().equals("published_information"))
.filter(entry -> entry.getValue().equals("Oxford University Press"))
.findFirst()
.orElseThrow();
var asyaLyon2 = redactionLog.getRedactionLogEntry()
.stream()
.filter(entry -> entry.getType().equals("CBI_author"))
.filter(entry -> entry.getValue().equals("Asya Lyon"))
.filter(entry -> entry.getSectionNumber() == publishedInformationEntry2.getSectionNumber())
.findFirst()
.orElseThrow();
assertTrue(asyaLyon2.isRedacted());
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
String outputFileName = OsUtils.getTemporaryDirectory() + "/AcceptanceTest.pdf";
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) {
fileOutputStream.write(annotateResponse.getDocument());
}
}
}

View File

@ -20,7 +20,7 @@ public class RedactionEntityTest {
entity.skip("CBI.3.0", "");
entity.skip("CBI.4.1", "");
entity.skip("CBI.4.0", "");
assertThat(entity.getMatchedRule().ruleIdentifier().toString()).isEqualTo("CBI.4.1");
assertThat(entity.getMatchedRule().getRuleIdentifier().toString()).isEqualTo("CBI.4.1");
assertThat(entity.getMatchedRuleUnit()).isEqualTo(4);
}
@ -34,7 +34,7 @@ public class RedactionEntityTest {
entity.skip("CBI.3.0", "");
entity.skip("CBI.4.1", "");
entity.skip("CBI.4.0", "");
assertThat(entity.getMatchedRule().ruleIdentifier().toString()).isEqualTo("MAN.2.0");
assertThat(entity.getMatchedRule().getRuleIdentifier().toString()).isEqualTo("MAN.2.0");
assertThat(entity.getMatchedRuleUnit()).isEqualTo(2);
}
@ -59,7 +59,7 @@ public class RedactionEntityTest {
entity.apply("CBI.0.0", "", "");
});
entity.skip("CBI.2.0", "");
assertThat(entity.getMatchedRule().ruleIdentifier().toString()).isEqualTo("CBI.2.0");
assertThat(entity.getMatchedRule().getRuleIdentifier().toString()).isEqualTo("CBI.2.0");
assertThat(entity.getMatchedRuleUnit()).isEqualTo(2);
}

View File

@ -44,6 +44,17 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra
}
@Test
public void assertSameEntitiesCantBeCreatedTwice() {
Document document = buildGraph("files/new/crafted document.pdf");
String type = "CBI_author";
assertTrue(entityCreationService.byBoundary(new Boundary(0, 10), type, EntityType.ENTITY, document).isPresent());
assertTrue(entityCreationService.byBoundary(new Boundary(0, 10), type, EntityType.ENTITY, document).isEmpty());
assertEquals(1, document.getEntities().size());
}
private RedactionEntity createAndInsertEntity(Document document, String searchTerm) {
int start = document.getTextBlock().indexOf(searchTerm);

View File

@ -138,7 +138,7 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
assertFalse(entity.getIntersectingNodes().isEmpty());
assertEquals(1, entity.getPages().size());
assertEquals("David Ksenia", entity.getValue());
assertEquals("Something", entity.getMatchedRule().legalBasis());
assertEquals("Something", entity.getMatchedRule().getLegalBasis());
assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId());
assertFalse(entity.isRemoved());
assertTrue(entity.isSkipRemoveEntitiesContainedInLarger());

View File

@ -0,0 +1,58 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.sectionidentifiers;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.junit.jupiter.api.Test;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SectionIdentifier;
class SectionIdentifierTest {
@Test
public void testParentOf() {
var headline = SectionIdentifier.fromSearchText("1 Did you ever hear the tragedy of Darth Plagueis The Wise?");
var headline1 = SectionIdentifier.fromSearchText("1.0 I thought not. Its not a story the Jedi would tell you.");
var headline2 = SectionIdentifier.fromSearchText("1.1 Its a Sith legend. Darth Plagueis was a Dark Lord of the Sith, ");
var headline3 = SectionIdentifier.fromSearchText("1.2.3 so powerful and so wise he could use the Force to influence the midichlorians to create life…");
var headline4 = SectionIdentifier.fromSearchText("1.2.3.4 He had such a knowledge of the dark side that he could even keep the ones he cared about from dying.");
var headline5 = SectionIdentifier.fromSearchText("1.2.3.4.5 The dark side of the Force is a pathway to many abilities some consider to be unnatural.");
var headline6 = SectionIdentifier.fromSearchText("2.0 He became so powerful…");
var headline7 = SectionIdentifier.fromSearchText("10000.0 the only thing he was afraid of was losing his power,");
var headline8 = SectionIdentifier.fromSearchText("A.0 which eventually, of course, he did.");
var headline9 = SectionIdentifier.fromSearchText("Unfortunately, he taught his apprentice everything he knew, then his apprentice killed him in his sleep.");
var headline10 = SectionIdentifier.fromSearchText("2.1.2 Ironic.");
var headline11 = SectionIdentifier.fromSearchText("2.He could save others from death,");
var headline12 = SectionIdentifier.fromSearchText(" 2. but not himself.");
var paragraph1 = SectionIdentifier.asChildOf(headline);
assertTrue(paragraph1.isChildOf(headline));
assertTrue(headline.isParentOf(paragraph1));
assertFalse(paragraph1.isParentOf(headline));
assertFalse(headline.isParentOf(headline1));
assertTrue(headline.isParentOf(headline2));
assertTrue(headline.isParentOf(headline3));
assertTrue(headline.isParentOf(headline4));
assertTrue(headline.isParentOf(headline5));
assertTrue(headline1.isParentOf(headline2));
assertFalse(headline1.isParentOf(headline1));
assertTrue(headline3.isParentOf(headline4));
assertFalse(headline4.isParentOf(headline5));
assertFalse(headline2.isParentOf(headline3));
assertFalse(headline2.isParentOf(headline4));
assertTrue(headline1.isParentOf(headline3));
assertTrue(headline1.isParentOf(headline4));
assertFalse(headline1.isParentOf(headline6));
assertFalse(headline1.isParentOf(headline7));
assertFalse(headline8.isParentOf(headline1));
assertFalse(headline8.isParentOf(headline2));
assertFalse(headline8.isParentOf(headline3));
assertFalse(headline8.isParentOf(headline4));
assertFalse(headline9.isParentOf(headline9));
assertTrue(headline10.isChildOf(headline11));
assertTrue(headline10.isChildOf(headline12));
}
}

View File

@ -0,0 +1,93 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.util.List;
import org.junit.jupiter.api.Test;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
class AtomicTextBlockTest {
@Test
void subSequenceWithLineBreaks1() {
String searchText = "1234 6789 ";
var atb = AtomicTextBlock.builder().searchText(searchText).lineBreaks(List.of(5, 7)).boundary(new Boundary(0, searchText.length())).build();
String searchTextWithLineBreaks = atb.searchTextWithLineBreaks();
assertEquals("1234\n6789\n", searchTextWithLineBreaks);
}
@Test
void subSequenceWithLineBreaks2() {
String searchText = "1234 6789 ";
var atb = AtomicTextBlock.builder().searchText(searchText).lineBreaks(List.of(5, 7, 8, 9)).boundary(new Boundary(0, searchText.length())).build();
String searchTextWithLineBreaks = atb.searchTextWithLineBreaks();
assertEquals("1234\n6789\n", searchTextWithLineBreaks);
}
@Test
void subSequenceWithLineBreaks3() {
String searchText = "1234 6789 1234 ";
var atb = AtomicTextBlock.builder().searchText(searchText).lineBreaks(List.of(5, 7)).boundary(new Boundary(0, searchText.length())).build();
String searchTextWithLineBreaks = atb.searchTextWithLineBreaks();
assertEquals("1234\n6789\n1234\n", searchTextWithLineBreaks);
}
@Test
void subSequenceWithLineBreaks4() {
String searchText = "1234 6789 1234 ";
var atb = AtomicTextBlock.builder().searchText(searchText).lineBreaks(List.of(5, 7)).boundary(new Boundary(0, searchText.length())).build();
var textBlock = new ConcatenatedTextBlock(List.of(atb));
String searchTextWithLineBreaks = textBlock.searchTextWithLineBreaks();
assertEquals("1234\n6789\n1234\n", searchTextWithLineBreaks);
}
@Test
void subSequenceWithLineBreaks5() {
String searchText1 = "1234 6789 ";
String searchText2 = "1234 ";
var atb1 = AtomicTextBlock.builder().searchText(searchText1).lineBreaks(List.of(5, 7)).boundary(new Boundary(0, searchText1.length())).build();
var atb2 = AtomicTextBlock.builder()
.searchText(searchText2)
.lineBreaks(List.of())
.boundary(new Boundary(searchText1.length(), searchText1.length() + searchText2.length()))
.build();
var textBlock = new ConcatenatedTextBlock(List.of(atb1, atb2));
String searchTextWithLineBreaks = textBlock.searchTextWithLineBreaks();
assertEquals("1234\n6789\n1234\n", searchTextWithLineBreaks);
}
@Test
void subSequenceWithLineBreaks6() {
String searchText1 = "1234 6789 ";
String searchText2 = "1234 ";
String searchText3 = "1234 8475678900 ";
var atb1 = AtomicTextBlock.builder().searchText(searchText1).lineBreaks(List.of(5, 7)).boundary(new Boundary(0, searchText1.length())).build();
var atb2 = AtomicTextBlock.builder()
.searchText(searchText2)
.lineBreaks(List.of())
.boundary(new Boundary(searchText1.length(), searchText1.length() + searchText2.length()))
.build();
var atb3 = AtomicTextBlock.builder()
.searchText(searchText3)
.lineBreaks(List.of(atb2.getBoundary().end() + 6))
.boundary(new Boundary(atb2.getBoundary().end(), atb2.getBoundary().end() + searchText3.length()))
.build();
var textBlock = new ConcatenatedTextBlock(List.of(atb1, atb2, atb3));
String searchTextWithLineBreaks = textBlock.searchTextWithLineBreaks();
assertEquals("1234\n6789\n1234\n1234 8475678900\n", searchTextWithLineBreaks);
}
}

View File

@ -0,0 +1,90 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.awt.geom.Rectangle2D;
import java.util.List;
import org.junit.jupiter.api.Test;
class RectangleTransformationsTest {
@Test
public void testRectangle2DBBox() {
var r1 = new Rectangle2D.Double(0, 0, 1, 1);
var r2 = new Rectangle2D.Double(1, 1, 1, 1);
var result = RectangleTransformations.rectangle2DBBox(List.of(r1, r2));
assertEquals(0, result.getX());
assertEquals(0, result.getY());
assertEquals(2, result.getWidth());
assertEquals(2, result.getHeight());
}
@Test
public void testRectangle2DBBox2() {
var r1 = new Rectangle2D.Double(0, 0, -1, -1);
var r2 = new Rectangle2D.Double(1, 1, 1, 1);
var result = RectangleTransformations.rectangle2DBBox(List.of(r1, r2));
assertEquals(-1, result.getX());
assertEquals(-1, result.getY());
assertEquals(3, result.getWidth());
assertEquals(3, result.getHeight());
}
@Test
public void testRectangle2DBBox3() {
var r1 = new Rectangle2D.Double(0, 0, -1, -1);
var r2 = new Rectangle2D.Double(1, 1, 1, 1);
var result = RectangleTransformations.rectangle2DBBox(List.of(r2, r1));
assertEquals(-1, result.getX());
assertEquals(-1, result.getY());
assertEquals(3, result.getWidth());
assertEquals(3, result.getHeight());
}
@Test
public void testRectangle2DBBox4() {
var r1 = new Rectangle2D.Double(2, 0, -1, -1);
var r2 = new Rectangle2D.Double(0, 2, 1, -1);
var result = RectangleTransformations.rectangle2DBBox(List.of(r2, r1));
assertEquals(0, result.getX());
assertEquals(-1, result.getY());
assertEquals(2, result.getWidth());
assertEquals(3, result.getHeight());
}
@Test
public void testRectangle2DBBox5() {
var r1 = new Rectangle2D.Double(2, 0, -1, -1);
var r2 = new Rectangle2D.Double(0, 2, 1, -1);
var r3 = new Rectangle2D.Double(3, 2, 1, 1);
var result = RectangleTransformations.rectangle2DBBox(List.of(r2, r1, r3));
assertEquals(0, result.getX());
assertEquals(-1, result.getY());
assertEquals(4, result.getWidth());
assertEquals(4, result.getHeight());
}
@Test
public void testRectangle2DBBox6() {
var r1 = new Rectangle2D.Double(0, 0, -1, -1);
var r2 = new Rectangle2D.Double(-1, -1, -1, -1);
var result = RectangleTransformations.rectangle2DBBox(List.of(r1, r2));
assertEquals(-2, result.getX());
assertEquals(-2, result.getY());
assertEquals(2, result.getWidth());
assertEquals(2, result.getHeight());
}
}

View File

@ -8,6 +8,7 @@ import java.awt.geom.Rectangle2D;
import java.io.File;
import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@ -66,6 +67,8 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
.filter(e -> !e.type().equals("CBI_author"));
List<RedactionEntity> redactionEntities = Stream.concat(entityRecognitionEntities.stream(), unchangedAddressParts)
.map(e -> entityCreationService.byBoundary(e.boundary(), e.type(), EntityType.ENTITY, document))
.filter(Optional::isPresent)
.map(Optional::get)
.toList();
redactionEntities.stream()
.collect(Collectors.groupingBy(e -> e.getPages().stream().findFirst().get().getNumber()))
@ -98,6 +101,8 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
log.info("Combined to CBI_address");
List<RedactionEntity> cbiAddressEntities = nerEntityBoundaries.stream()
.map(b -> entityCreationService.byBoundary(b, "CBI_address", EntityType.RECOMMENDATION, document))
.filter(Optional::isPresent)
.map(Optional::get)
.toList();
assertFalse(cbiAddressEntities.isEmpty());
assertTrue(cbiAddressEntities.stream().allMatch(entity -> entity.getBoundary().start() < entity.getBoundary().end()));
@ -108,6 +113,8 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
.getNerEntityList()
.stream()
.map(e -> entityCreationService.byBoundary(e.boundary(), e.type(), EntityType.ENTITY, document))
.filter(Optional::isPresent)
.map(Optional::get)
.toList();
Stream.concat(cbiAddressEntities.stream(), validatedEntities.stream())
.collect(Collectors.groupingBy(e -> e.getPages().stream().findFirst().get().getNumber()))

View File

@ -143,6 +143,7 @@ Allen T.
Allen T.R.
Almeida A
Almeida A.
Asya Lyon
Almeida A.A.
Almeida A.A.|Vassilieff I.
Almeida|A.A.|Vassilieff|I.

View File

@ -87,3 +87,4 @@ Toxicol Sci.
Toxicol Sci. 1
Test Ignored Hint Published Information
Workshop
Oxford University Press

View File

@ -0,0 +1,772 @@
package drools
import static java.lang.String.format;
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.anyMatch;
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.exactMatch;
import java.util.List;
import java.util.LinkedList;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.Collection;
import java.util.stream.Stream;
import java.util.Optional;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Section;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Table;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Paragraph;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Image;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.*;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.*;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.ImageType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityCreationService;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryModel;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.ManualRedactionApplicationService;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility;
global Document document
global EntityCreationService entityCreationService
global ManualRedactionApplicationService manualRedactionApplicationService
global NerEntitiesAdapter nerEntitiesAdapter
global Dictionary dictionary
//------------------------------------ queries ------------------------------------
query "getFileAttributes"
$fileAttribute: FileAttribute()
end
//------------------------------------ Syngenta specific rules ------------------------------------
// Rule unit: SYN.1
rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL"
when
$section: Section(containsString("CT") || containsString("BL"))
then
/* Regular expression: ((\b((([Cc]T(([1ILli\/])| L|~P))|(BL))[\. ]?([\dA-Ziltphz~\/.:!]| ?[\(',][Ppi](\(e)?|([\(-?']\/))+( ?[\(\/\dA-Znasieg]+)?)\b( ?\/? ?\d+)?)|(\bCT[L1i]\b)) */
entityCreationService.byRegexIgnoreCase("((\\b((([Cc]T(([1ILli\\/])| L|~P))|(BL))[\\. ]?([\\dA-Ziltphz~\\/.:!]| ?[\\(',][Ppi](\\(e)?|([\\(-?']\\/))+( ?[\\(\\/\\dA-Znasieg]+)?)\\b( ?\\/? ?\\d+)?)|(\\bCT[L1i]\\b))", "CBI_address", EntityType.RECOMMENDATION, $section)
.forEach(entity -> {
entity.skip("SYN.1.0", "");
entity.addEngine(Engine.RULE);
insert(entity);
});
end
//------------------------------------ CBI rules ------------------------------------
// Rule unit: CBI.0
rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_author", dictionaryEntry)
then
$entity.apply("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_author", dictionaryEntry)
then
$entity.apply("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
// Rule unit: CBI.1
rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_address", dictionaryEntry)
then
$entity.skip("CBI.1.0", "Address found for Non Vertebrate Study");
end
rule "CBI.1.1: Redact CBI Address (Vertebrate Study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_address", dictionaryEntry)
then
$entity.apply("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
// Rule unit: CBI.2
rule "CBI.2.0: Don't redact genitive CBI_author"
when
$entity: RedactionEntity(type == "CBI_author", anyMatch(textAfter, "[''ʼˈ´`ʻ']s"), isApplied())
then
entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document)
.ifPresent(falsePositive -> {
falsePositive.skip("CBI.2.0", "Genitive Author found");
insert(falsePositive);
});
end
// Rule unit: CBI.7
rule "CBI.7.0: Do not redact Names and Addresses if published information found in section without tables"
when
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.7.0",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
});
end
rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row"
when
$table: Table(hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.7.1",
"Published Information found in row",
$table.getEntitiesOfTypeInSameRow("published_information", redactionEntity)
);
});
end
// Rule unit: CBI.9
rule "CBI.9.0: Redact all Cell's with Header Author(s) as CBI_author (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$table: Table(hasHeader("Author(s)"))
then
$table.streamTableCellsWithHeader("Author(s)")
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY))
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(redactionEntity -> {
redactionEntity.apply("CBI.9.0", "Author(s) found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
redactionEntity.addEngine(Engine.RULE);
insert(redactionEntity);
});
end
rule "CBI.9.1: Redact all Cell's with Header Author as CBI_author (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$table: Table(hasHeader("Author"))
then
$table.streamTableCellsWithHeader("Author")
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY))
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(redactionEntity -> {
redactionEntity.apply("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
redactionEntity.addEngine(Engine.RULE);
insert(redactionEntity);
});
end
// Rule unit: CBI.10
rule "CBI.10.0: Redact all Cell's with Header Author(s) as CBI_author (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$table: Table(hasHeader("Author(s)"))
then
$table.streamTableCellsWithHeader("Author(s)")
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY))
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(redactionEntity -> {
redactionEntity.apply("CBI.10.0", "Author(s) found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
redactionEntity.addEngine(Engine.RULE);
insert(redactionEntity);
});
end
rule "CBI.10.1: Redact all Cell's with Header Author as CBI_author (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$table: Table(hasHeader("Author"))
then
$table.streamTableCellsWithHeader("Author")
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY))
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(redactionEntity -> {
redactionEntity.apply("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
redactionEntity.addEngine(Engine.RULE);
insert(redactionEntity);
});
end
// Rule unit: CBI.11
rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header"
agenda-group "LOCAL_DICTIONARY_ADDS"
salience -1
when
$table: Table(hasHeader("Author(s)") && hasHeader("Vertebrate Study Y/N"))
then
$table.getEntitiesOfType("CBI_author").forEach(entity -> dictionary.addMultipleAuthorsAsRecommendation(entity));
end
// Rule unit: CBI.16
rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.apply("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
entity.addEngine(Engine.RULE);
dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false);
insert(entity);
});
end
rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.apply("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
entity.addEngine(Engine.RULE);
insert(entity);
dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false);
});
end
// Rule unit: CBI.17
rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon"
when
$section: Section(!hasTables(), containsString("Species") && containsString("Source") && !containsString("Species:") && !containsString("Source:"))
then
entityCreationService.lineAfterString("Source", "CBI_address", EntityType.RECOMMENDATION, $section)
.forEach(entity -> {
entity.addEngine(Engine.RULE);
entity.skip("CBI.17.0", "Line after \"Source\" in Test Organism Section");
insert(entity);
});
end
rule "CBI.17.1: Add recommendation for Addresses in Test Organism sections, with colon"
when
$section: Section(!hasTables(), containsString("Species:"), containsString("Source:"))
then
entityCreationService.lineAfterString("Source:", "CBI_address", EntityType.RECOMMENDATION, $section)
.forEach(entity -> {
entity.addEngine(Engine.RULE);
entity.skip("CBI.17.1", "Line after \"Source:\" in Test Animals Section");
insert(entity);
});
end
// Rule unit: CBI.20
rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
not FileAttribute(label == "Vertebrate Study", value == "Yes")
$section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:"))
then
entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section)
.forEach(laboratoryEntity -> {
laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study");
laboratoryEntity.addEngine(Engine.RULE);
dictionary.addLocalDictionaryEntry(laboratoryEntity);
insert(laboratoryEntity);
});
end
rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
FileAttribute(label == "Vertebrate Study", value == "Yes")
$section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:"))
then
entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section)
.forEach(laboratoryEntity -> {
laboratoryEntity.apply("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
laboratoryEntity.addEngine(Engine.RULE);
dictionary.addLocalDictionaryEntry(laboratoryEntity);
insert(laboratoryEntity);
});
end
//------------------------------------ PII rules ------------------------------------
// Rule unit: PII.0
rule "PII.0.0: Redact all PII (non vertebrate study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$pii: RedactionEntity(type == "PII", dictionaryEntry)
then
$pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "PII.0.1: Redact all PII (vertebrate study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$pii: RedactionEntity(type == "PII", dictionaryEntry)
then
$pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
// Rule unit: PII.1
rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$section: Section(containsString("@"))
then
entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section)
.forEach(emailEntity -> {
emailEntity.addEngine(Engine.RULE);
emailEntity.apply("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
insert(emailEntity);
});
end
rule "PII.1.1: Redact Emails by RegEx (vertebrate study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$section: Section(containsString("@"))
then
entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section)
.forEach(emailEntity -> {
emailEntity.addEngine(Engine.RULE);
emailEntity.apply("PII.1.1", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
insert(emailEntity);
});
end
// Rule unit: PII.2
rule "PII.2.0: Redact Phone and Fax by RegEx (non vertebrate study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$section: Section(containsString("Contact") ||
containsString("Telephone") ||
containsString("Phone") ||
containsString("Ph.") ||
containsString("Fax") ||
containsString("Tel") ||
containsString("Ter") ||
containsString("Mobile") ||
containsString("Fel") ||
containsString("Fer"))
then
entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section)
.forEach(contactEntity -> {
contactEntity.addEngine(Engine.RULE);
contactEntity.apply("PII.2.0", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
insert(contactEntity);
});
end
rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$section: Section(containsString("Contact") ||
containsString("Telephone") ||
containsString("Phone") ||
containsString("Ph.") ||
containsString("Fax") ||
containsString("Tel") ||
containsString("Ter") ||
containsString("Mobile") ||
containsString("Fel") ||
containsString("Fer"))
then
entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section)
.forEach(contactEntity -> {
contactEntity.addEngine(Engine.RULE);
contactEntity.apply("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
insert(contactEntity);
});
end
// Rule unit: PII.9
rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebrate study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:"))
then
entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
.forEach(authorEntity -> {
authorEntity.apply("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
authorEntity.addEngine(Engine.RULE);
insert(authorEntity);
});
end
rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (non vertebrate study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:"))
then
entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
.forEach(authorEntity -> {
authorEntity.apply("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
authorEntity.addEngine(Engine.RULE);
insert(authorEntity);
});
end
rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebrate study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:"))
then
entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
.forEach(authorEntity -> {
authorEntity.apply("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
authorEntity.addEngine(Engine.RULE);
insert(authorEntity);
});
end
rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (vertebrate study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:"))
then
entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
.forEach(authorEntity -> {
authorEntity.apply("PII.9.3", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
authorEntity.addEngine(Engine.RULE);
insert(authorEntity);
});
end
//------------------------------------ Other rules ------------------------------------
// Rule unit: ETC.0
rule "ETC.0.0: Purity Hint"
when
$section: Section(containsStringIgnoreCase("purity"))
then
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", "hint_only", EntityType.ENTITY, 1, $section)
.forEach(hint -> {
hint.addEngine(Engine.RULE);
hint.skip("ETC.0.0", "");
});
end
// Rule unit: ETC.2
rule "ETC.2.0: Redact signatures (non vertebrate study)"
when
not FileAttribute(label == "Vertebrate Study", value == "Yes")
$signature: Image(imageType == ImageType.SIGNATURE)
then
$signature.apply("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "ETC.2.0: Redact signatures (vertebrate study)"
when
FileAttribute(label == "Vertebrate Study", value == "Yes")
$signature: Image(imageType == ImageType.SIGNATURE)
then
$signature.apply("ETC.2.0", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
// Rule unit: ETC.3
rule "ETC.3.0: Redact logos (vertebrate study)"
when
not FileAttribute(label == "Vertebrate Study", value == "Yes")
$logo: Image(imageType == ImageType.LOGO)
then
$logo.apply("ETC.3.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "ETC.3.1: Redact logos (non vertebrate study)"
when
FileAttribute(label == "Vertebrate Study", value == "Yes")
$logo: Image(imageType == ImageType.LOGO)
then
$logo.apply("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
// Rule unit: ETC.5
rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'"
when
not FileAttribute(label == "Confidentiality", value == "confidential")
$dossierRedaction: RedactionEntity(type == "dossier_redaction")
then
$dossierRedaction.setIgnored(true);
update($dossierRedaction);
$dossierRedaction.getIntersectingNodes().forEach(node -> update(node));
end
//------------------------------------ AI rules ------------------------------------
// Rule unit: AI.0
rule "AI.0.0: add all NER Entities of type CBI_author"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("CBI_author"))
then
nerEntities.streamEntitiesOfType("CBI_author")
.map(nerEntity -> entityCreationService.byNerEntity(nerEntity, EntityType.RECOMMENDATION, document))
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(entity -> insert(entity));
end
// Rule unit: AI.1
rule "AI.1.0: combine and add NER Entities as CBI_address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("ORG") || hasEntitiesOfType("STREET") || hasEntitiesOfType("CITY"))
then
nerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
.map(boundary -> entityCreationService.byBoundary(boundary, "CBI_address", EntityType.RECOMMENDATION, document))
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(entity -> {
entity.addEngine(Engine.NER);
insert(entity);
});
end
//------------------------------------ Manual redaction rules ------------------------------------
// Rule unit: MAN.0
rule "MAN.0.0: Apply manual resize redaction"
salience 128
when
$resizeRedaction: ManualResizeRedaction($id: annotationId)
$entityToBeResized: RedactionEntity(matchesAnnotationId($id))
then
manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
retract($resizeRedaction);
update($entityToBeResized);
$entityToBeResized.getIntersectingNodes().forEach(node -> update(node));
end
// Rule unit: MAN.1
rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity"
salience 128
when
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
then
$entityToBeRemoved.setIgnored(true);
update($entityToBeRemoved);
retract($idRemoval);
$entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node));
end
rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image"
salience 128
when
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
$imageEntityToBeRemoved: Image($id == id)
then
$imageEntityToBeRemoved.setIgnored(true);
update($imageEntityToBeRemoved);
retract($idRemoval);
update($imageEntityToBeRemoved.getParent());
end
// Rule unit: MAN.2
rule "MAN.2.0: Apply force redaction"
salience 128
when
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
$entityToForce: RedactionEntity(matchesAnnotationId($id))
then
$entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis);
$entityToForce.setRemoved(false);
$entityToForce.setIgnored(false);
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
update($entityToForce);
$entityToForce.getIntersectingNodes().forEach(node -> update(node));
retract($force);
end
// Rule unit: MAN.3
rule "MAN.3.0: Apply image recategorization"
salience 128
when
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
$imageToBeRecategorized: Image($id == id)
then
$imageToBeRecategorized.setImageType(ImageType.fromString($imageType));
update($imageToBeRecategorized);
retract($recategorization);
update($imageToBeRecategorized.getParent());
end
//------------------------------------ Entity merging rules ------------------------------------
// Rule unit: X.0
rule "X.0.0: remove Entity contained by Entity of same type"
salience 65
when
$larger: RedactionEntity($type: type, $entityType: entityType)
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger)
then
$contained.remove();
retract($contained);
end
// Rule unit: X.1
rule "X.1.0: merge intersecting Entities of same type"
salience 64
when
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger)
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger)
then
$first.remove();
$second.remove();
RedactionEntity mergedEntity = entityCreationService.byEntities(List.of($first, $second), $type, $entityType, document);
retract($first);
retract($second);
insert(mergedEntity);
mergedEntity.getIntersectingNodes().forEach(node -> update(node));
end
// Rule unit: X.2
rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
salience 64
when
$falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE)
$entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger)
then
$entity.getIntersectingNodes().forEach(node -> update(node));
$entity.remove();
retract($entity)
end
// Rule unit: X.3
rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"
salience 64
when
$falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION)
$recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
then
$recommendation.remove();
retract($recommendation);
end
// Rule unit: X.4
rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"
salience 256
when
$entity: RedactionEntity($type: type, entityType == EntityType.ENTITY)
$recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
then
$entity.addEngines($recommendation.getEngines());
$recommendation.remove();
retract($recommendation);
end
// Rule unit: X.5
rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
salience 256
when
$entity: RedactionEntity(entityType == EntityType.ENTITY)
$recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
then
$recommendation.remove();
retract($recommendation);
end
// Rule unit: X.6
rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY"
salience 32
when
$higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY)
$lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger)
then
$lowerRank.getIntersectingNodes().forEach(node -> update(node));
$lowerRank.remove();
retract($lowerRank);
end
//------------------------------------ File attributes rules ------------------------------------
// Rule unit: FA.1
rule "FA.1.0: remove duplicate FileAttributes"
salience 64
when
$fileAttribute: FileAttribute($label: label, $value: value)
$duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value)
then
retract($duplicate);
end
//------------------------------------ Local dictionary search rules ------------------------------------
// Rule unit: LDS.0
rule "LDS.0.0: run local dictionary search"
agenda-group "LOCAL_DICTIONARY_ADDS"
salience -999
when
DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels()
then
entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document)
.forEach(entity -> {
entity.addEngine(Engine.RULE);
insert(entity);
});
end

View File

@ -132,9 +132,11 @@ rule "CBI.2.0: Don't redact genitive CBI_author"
when
$entity: RedactionEntity(type == "CBI_author", anyMatch(textAfter, "[''ʼˈ´`ʻ']s"), isApplied())
then
RedactionEntity falsePositive = entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document);
falsePositive.skip("CBI.2.0", "Genitive Author found");
insert(falsePositive);
entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document)
.ifPresent(falsePositive -> {
falsePositive.skip("CBI.2.0", "Genitive Author found");
insert(falsePositive);
});
end
@ -246,7 +248,6 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red
then
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(entity -> {
entity.addEngine(Engine.RULE);
entity.applyWithReferences(
"CBI.5.0",
"no_redaction_indicator but also redaction_indicator found",
@ -264,10 +265,9 @@ rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also red
hasEntitiesOfType("redaction_indicator"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no-redaction_indicator"))
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator"))
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
.forEach(entity -> {
entity.addEngine(Engine.RULE);
entity.applyWithReferences(
"CBI.5.1",
"no_redaction_indicator but also redaction_indicator found",
@ -290,7 +290,6 @@ rule "CBI.6.0: Don't redact Names and Addresses if vertebrate but also published
then
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(entity -> {
entity.addEngine(Engine.RULE);
entity.skipWithReferences(
"CBI.6.0",
"vertebrate but also published_information found",
@ -307,10 +306,9 @@ rule "CBI.6.1: Don't redact Names and Addresses if vertebrate but also published
hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no-redaction_indicator"))
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "published_information"))
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
.forEach(entity -> {
entity.addEngine(Engine.RULE);
entity.skipWithReferences(
"CBI.6.1",
"vertebrate but also published_information found",
@ -326,8 +324,8 @@ rule "CBI.6.1: Don't redact Names and Addresses if vertebrate but also published
rule "CBI.7.0: Do not redact Names and Addresses if published information found in section without tables"
when
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(redactionEntity -> {
@ -342,7 +340,7 @@ rule "CBI.7.0: Do not redact Names and Addresses if published information found
rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row"
when
$table: Table(hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(redactionEntity -> {
@ -522,7 +520,7 @@ rule "CBI.13.0: Ignore CBI Address Recommendations"
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_address", entityType == EntityType.RECOMMENDATION)
then
$entity.removeFromGraph();
$entity.remove();
retract($entity)
end
@ -655,11 +653,13 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials"
anyMatch(textAfter, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)")
)
then
RedactionEntity expandedEntity = entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)");
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
$entityToExpand.removeFromGraph();
retract($entityToExpand);
insert(expandedEntity);
entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)")
.ifPresent(expandedEntity -> {
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
$entityToExpand.remove();
retract($entityToExpand);
insert(expandedEntity);
});
end
@ -668,11 +668,13 @@ rule "CBI.19.0: Expand CBI_author entities with salutation prefix"
when
$entityToExpand: RedactionEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
then
RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*");
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
$entityToExpand.removeFromGraph();
retract($entityToExpand);
insert(expandedEntity);
entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")
.ifPresent(expandedEntity -> {
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
$entityToExpand.remove();
retract($entityToExpand);
insert(expandedEntity);
});
end
@ -1143,10 +1145,12 @@ rule "PII.12.0: Expand PII entities with salutation prefix"
when
$entityToExpand: RedactionEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
then
RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*");
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
expandedEntity.addEngine(Engine.RULE);
insert(expandedEntity);
entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")
.ifPresent(expandedEntity -> {
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
expandedEntity.addEngine(Engine.RULE);
insert(expandedEntity);
});
end
@ -1229,8 +1233,9 @@ rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confi
not FileAttribute(label == "Confidentiality", value == "confidential")
$dossierRedaction: RedactionEntity(type == "dossier_redaction")
then
$dossierRedaction.removeFromGraph();
retract($dossierRedaction);
$dossierRedaction.setIgnored(true);
update($dossierRedaction);
$dossierRedaction.getIntersectingNodes().forEach(node -> update(node));
end
@ -1290,6 +1295,8 @@ rule "AI.0.0: add all NER Entities of type CBI_author"
then
nerEntities.streamEntitiesOfType("CBI_author")
.map(nerEntity -> entityCreationService.byNerEntity(nerEntity, EntityType.RECOMMENDATION, document))
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(entity -> insert(entity));
end
@ -1302,6 +1309,8 @@ rule "AI.1.0: combine and add NER Entities as CBI_address"
then
nerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
.map(boundary -> entityCreationService.byBoundary(boundary, "CBI_address", EntityType.RECOMMENDATION, document))
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(entity -> {
entity.addEngine(Engine.NER);
insert(entity);
@ -1318,6 +1327,8 @@ rule "AI.2.0: add all NER Entities of any type except CBI_author"
nerEntities.getNerEntityList().stream()
.filter(nerEntity -> !nerEntity.type().equals("CBI_author"))
.map(nerEntity -> entityCreationService.byNerEntity(nerEntity, nerEntity.type().toLowerCase(), EntityType.RECOMMENDATION, document))
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(entity -> insert(entity));
end
@ -1334,6 +1345,7 @@ rule "MAN.0.0: Apply manual resize redaction"
manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
retract($resizeRedaction);
update($entityToBeResized);
$entityToBeResized.getIntersectingNodes().forEach(node -> update(node));
end
@ -1341,21 +1353,27 @@ rule "MAN.0.0: Apply manual resize redaction"
rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity"
salience 128
when
IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
then
$entityToBeRemoved.setIgnored(true);
update($entityToBeRemoved);
retract($idRemoval);
$entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node));
end
rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image"
salience 128
when
IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
$imageEntityToBeRemoved: Image($id == id)
then
$imageEntityToBeRemoved.setIgnored(true);
update($imageEntityToBeRemoved);
retract($idRemoval);
update($imageEntityToBeRemoved.getParent());
end
@ -1363,11 +1381,16 @@ rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to
rule "MAN.2.0: Apply force redaction"
salience 128
when
ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
$entityToForce: RedactionEntity(matchesAnnotationId($id))
then
$entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis);
$entityToForce.setRemoved(false);
$entityToForce.setIgnored(false);
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
update($entityToForce);
$entityToForce.getIntersectingNodes().forEach(node -> update(node));
retract($force);
end
@ -1375,10 +1398,13 @@ rule "MAN.2.0: Apply force redaction"
rule "MAN.3.0: Apply image recategorization"
salience 128
when
ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
$image: Image($id == id)
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
$imageToBeRecategorized: Image($id == id)
then
$image.setImageType(ImageType.fromString($imageType));
$imageToBeRecategorized.setImageType(ImageType.fromString($imageType));
update($imageToBeRecategorized);
update($imageToBeRecategorized.getParent());
retract($recategorization);
end
@ -1391,7 +1417,7 @@ rule "X.0.0: remove Entity contained by Entity of same type"
$larger: RedactionEntity($type: type, $entityType: entityType)
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger)
then
$contained.removeFromGraph();
$contained.remove();
retract($contained);
end
@ -1403,12 +1429,13 @@ rule "X.1.0: merge intersecting Entities of same type"
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger)
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger)
then
$first.removeFromGraph();
$second.removeFromGraph();
$first.remove();
$second.remove();
RedactionEntity mergedEntity = entityCreationService.byEntities(List.of($first, $second), $type, $entityType, document);
retract($first);
retract($second);
insert(mergedEntity);
mergedEntity.getIntersectingNodes().forEach(node -> update(node));
end
@ -1419,7 +1446,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
$falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE)
$entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger)
then
$entity.removeFromGraph();
$entity.getIntersectingNodes().forEach(node -> update(node));
$entity.remove();
retract($entity)
end
@ -1431,7 +1459,7 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM
$falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION)
$recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
then
$recommendation.removeFromGraph();
$recommendation.remove();
retract($recommendation);
end
@ -1444,7 +1472,7 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit
$recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
then
$entity.addEngines($recommendation.getEngines());
$recommendation.removeFromGraph();
$recommendation.remove();
retract($recommendation);
end
@ -1456,7 +1484,7 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
$entity: RedactionEntity(entityType == EntityType.ENTITY)
$recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
then
$recommendation.removeFromGraph();
$recommendation.remove();
retract($recommendation);
end
@ -1468,7 +1496,8 @@ rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENT
$higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY)
$lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger)
then
$lowerRank.removeFromGraph();
$lowerRank.getIntersectingNodes().forEach(node -> update(node));
$lowerRank.remove();
retract($lowerRank);
end

View File

@ -14,6 +14,11 @@ import java.util.Optional;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Section;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SectionIdentifier;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Table;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Paragraph;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Headline;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.*;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.*;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType;
@ -308,7 +313,18 @@ rule "DOC.5.0: Strain"
entity.apply("DOC.5.0", "Strain found.", "n-a");
});
end
rule "DOC.6.0"
when
Headline(containsStringIgnoreCase("materials and methods"), $sectionIdentifierMaterials: getSectionIdentifier())
Headline(containsStringIgnoreCase("controls"), getSectionIdentifier().isChildOf($sectionIdentifierMaterials), $sectionIdentifierControls: getSectionIdentifier())
$headline: Headline(containsStringIgnoreCase("positive control substances"), getSectionIdentifier().isChildOf($sectionIdentifierControls))
then
System.out.println($headline);
entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "irgendwas", EntityType.ENTITY)
.forEach(entity -> {
entity.apply("DOC.6.0", "positive control substance found", "n-a");
});
end
//rule "DOC.7.0: study title by document structure"
// when
@ -328,7 +344,7 @@ rule "DOC.7.0: study title"
when
$section: Section(isOnPage(1) && (containsString("Final Report") || containsString("SPL")))
then
entityCreationService.byRegexWithLinebreaks("(?<=\\n)[\\w\\W]{1,300}(?=\\nFinal Report)", "title", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> {
entityCreationService.byRegexWithLineBreaks("(?<=\\n)[\\w\\W]{1,300}(?=\\nFinal Report)", "title", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> {
entity.apply("DOC.7.0", "Title found", "n-a");
});
entityCreationService.betweenStrings("TITLE", "DATA REQUIREMENT", "title", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> {
@ -358,6 +374,8 @@ rule "DOC.8.1: Performing Laboratory (Name)"
nerEntities.streamEntitiesOfType("COUNTRY")
.filter(nerEntity -> $section.getBoundary().contains(nerEntity.boundary()))
.map(nerEntity -> entityCreationService.byNerEntity(nerEntity, "laboratory_country", EntityType.ENTITY, $section))
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(entity -> {
entity.apply("DOC.8.2", "Performing Laboratory found", "n-a");
insert(entity);
@ -572,8 +590,8 @@ rule "DOC.13.0: Clinical Signs"
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "clinical_signs", EntityType.ENTITY, $section);
entity.apply("DOC.13.0", "Clinical Signs found", "n-a");
entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_signs", EntityType.ENTITY)
.forEach(entity -> entity.apply("DOC.13.0", "Clinical Signs found", "n-a"));
end
@ -591,7 +609,7 @@ rule "DOC.14.0: Dosages"
entityCreationService.betweenStrings("Animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> {
entity.apply("DOC.14.0", "Dosage found", "n-a");
});
entityCreationService.byRegexWithLinebreaks("(?:\\.[\\s|\\n]|^.{5,20}\\n)([^\\.]{1,200}(?:animal|given|received)[^\\.]{1,200}dose\\s(?:levels?\\s)?(?:of|at)[^\\.]{1,200})(?:\\.[\\s|\\n|$])", "dosages", EntityType.ENTITY,1, $section).forEach(entity -> {
entityCreationService.byRegexWithLineBreaks("(?:\\.[\\s|\\n]|^.{5,20}\\n)([^\\.]{1,200}(?:animal|given|received)[^\\.]{1,200}dose\\s(?:levels?\\s)?(?:of|at)[^\\.]{1,200})(?:\\.[\\s|\\n|$])", "dosages", EntityType.ENTITY,1, $section).forEach(entity -> {
entity.apply("DOC.14.0", "Dosage found", "n-a");
});
end
@ -602,8 +620,8 @@ rule "DOC.15.0: Mortality"
$headline: Headline(containsString("Mortality") && !containsString("TABLE") && hasParagraphs())
FileAttribute(label == "OECD Number", value == "425")
then
var entity = entityCreationService.byBoundary(Boundary.merge($headline.getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "mortality", EntityType.ENTITY, $headline.getParent());
entity.apply("DOC.15.0", "Mortality found", "n-a");
entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality", EntityType.ENTITY)
.forEach(entity -> entity.apply("DOC.15.0", "Mortality found", "n-a"));
end
@ -615,8 +633,8 @@ rule "DOC.17.0: Study Conclusion"
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "study_conclusion", EntityType.ENTITY, $section);
entity.apply("DOC.17.0", "Study Conclusion found", "n-a");
entityCreationService.bySemanticNodeParagraphsOnly($section, "study_conclusion", EntityType.ENTITY)
.forEach(entity -> entity.apply("DOC.17.0", "Study Conclusion found", "n-a"));
end
@ -634,8 +652,8 @@ rule "DOC.18.0: Weight Behavior Changes"
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "weight_behavior_changes", EntityType.ENTITY, $section);
entity.apply("DOC.18.0", "Weight behavior changes found", "n-a");
entityCreationService.bySemanticNodeParagraphsOnly($section, "weight_behavior_changes", EntityType.ENTITY)
.forEach(entity -> entity.apply("DOC.18.0", "Weight behavior changes found", "n-a"));
end
@ -653,8 +671,8 @@ rule "DOC.19.0: Necropsy findings"
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "necropsy_findings", EntityType.ENTITY, $section);
entity.apply("DOC.19.0", "Necropsy section found", "n-a");
entityCreationService.bySemanticNodeParagraphsOnly($section, "necropsy_findings", EntityType.ENTITY)
.forEach( entity -> entity.apply("DOC.19.0", "Necropsy section found", "n-a"));
end
@ -673,8 +691,8 @@ rule "DOC.22.0: Clinical observations"
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "clinical_observations", EntityType.ENTITY, $section);
entity.apply("DOC.22.0", "Clinical observations section found", "n-a");
entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_observations", EntityType.ENTITY)
.forEach(entity -> entity.apply("DOC.22.0", "Clinical observations section found", "n-a"));
end
@ -730,8 +748,8 @@ rule "DOC.23.0: Bodyweight changes"
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "bodyweight_changes", EntityType.ENTITY, $section);
entity.apply("DOC.23.0", "Bodyweight section found", "n-a");
entityCreationService.bySemanticNodeParagraphsOnly($section, "bodyweight_changes", EntityType.ENTITY)
.forEach(entity -> entity.apply("DOC.23.0", "Bodyweight section found", "n-a"));
end
@ -743,8 +761,8 @@ rule "DOC.24.0: Study Design"
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "study_design", EntityType.ENTITY, $section);
entity.apply("DOC.24.0", "Study design section found", "n-a");
entityCreationService.bySemanticNodeParagraphsOnly($section, "study_design", EntityType.ENTITY)
.forEach(entity -> entity.apply("DOC.24.0", "Study design section found", "n-a"));
end
@ -765,8 +783,8 @@ rule "DOC.25.0: Results and Conclusion (406, 428, 438, 439, 474 & 487)"
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "results_and_conclusion", EntityType.ENTITY, $section);
entity.apply("DOC.25.0", "Results and Conclusion found", "n-a");
entityCreationService.bySemanticNodeParagraphsOnly($section, "results_and_conclusion", EntityType.ENTITY)
.forEach(entity -> entity.apply("DOC.25.0", "Results and Conclusion found", "n-a"));
end
@ -800,8 +818,8 @@ rule "DOC.32.0: Preliminary Test Results (429)"
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "preliminary_test_results", EntityType.ENTITY, $section);
entity.apply("DOC.32.0", "Preliminary Test Results found", "n-a");
entityCreationService.bySemanticNodeParagraphsOnly($section, "preliminary_test_results", EntityType.ENTITY)
.forEach(entity -> entity.apply("DOC.32.0", "Preliminary Test Results found", "n-a"));
end
@ -810,8 +828,8 @@ rule "DOC.33.0: Test Results (429)"
FileAttribute(label == "OECD Number", value == "429")
$section: Section((getHeadline().containsString("RESULTS AND DISCUSSION") || getHeadline().containsString("Estimation of the proliferative response of lymph node cells") || getHeadline().containsString("Results in the Main Experiment")) && hasParagraphs())
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "test_results", EntityType.ENTITY, $section);
entity.apply("DOC.33.0", "Test Results found", "n-a");
entityCreationService.bySemanticNodeParagraphsOnly($section, "test_results", EntityType.ENTITY)
.forEach(entity -> entity.apply("DOC.33.0", "Test Results found", "n-a"));
end
@ -946,8 +964,8 @@ rule "DOC.39.0: Dilution of the test substance"
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "dilution", EntityType.ENTITY, $section);
entity.apply("DOC.39.0", "Dilution found.", "n-a");
entityCreationService.bySemanticNodeParagraphsOnly($section, "dilution", EntityType.ENTITY)
.forEach(entity -> entity.apply("DOC.39.0", "Dilution found.", "n-a"));
end
@ -960,8 +978,8 @@ rule "DOC.40.0: Positive Control"
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "positive_control", EntityType.ENTITY, $section);
entity.apply("DOC.40.0", "Positive control found.", "n-a");
entityCreationService.bySemanticNodeParagraphsOnly($section, "positive_control", EntityType.ENTITY)
.forEach(entity -> entity.apply("DOC.40.0", "Positive control found.", "n-a"));
end
@ -970,8 +988,8 @@ rule "DOC.42.0: Mortality Statement"
FileAttribute(label == "OECD Number", value == "402")
$headline: Headline(containsString("Mortality") && !containsString("TABLE") && hasParagraphs())
then
var entity = entityCreationService.byBoundary(Boundary.merge($headline.getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "mortality_statement", EntityType.ENTITY, $headline.getParent());
entity.apply("DOC.42.0", "Mortality Statement found", "n-a");
entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality_statement", EntityType.ENTITY)
.forEach(entity -> entity.apply("DOC.42.0", "Mortality Statement found", "n-a"));
end
@ -1043,8 +1061,8 @@ rule "DOC.45.0: Doses (mg/kg bodyweight)"
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "doses_(mg_kg_bw)", EntityType.ENTITY, $section);
entity.apply("DOC.45.0", "Doses per bodyweight information found", "n-a");
entityCreationService.bySemanticNodeParagraphsOnly($section, "doses_(mg_kg_bw)", EntityType.ENTITY)
.forEach(entity -> entity.apply("DOC.45.0", "Doses per bodyweight information found", "n-a"));
end
@ -1090,11 +1108,16 @@ rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to
rule "MAN.2.0: Apply force redaction"
salience 128
when
ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
$entityToForce: RedactionEntity(matchesAnnotationId($id))
then
$entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis);
$entityToForce.setRemoved(false);
$entityToForce.setIgnored(false);
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
update($entityToForce);
$entityToForce.getIntersectingNodes().forEach(node -> update(node));
retract($force);
end

View File

@ -184,7 +184,6 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red
then
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(entity -> {
entity.addEngine(Engine.RULE);
entity.applyWithReferences(
"CBI.5.0",
"no_redaction_indicator but also redaction_indicator found",
@ -202,10 +201,9 @@ rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also red
hasEntitiesOfType("redaction_indicator"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no-redaction_indicator"))
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator"))
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
.forEach(entity -> {
entity.addEngine(Engine.RULE);
entity.applyWithReferences(
"CBI.5.1",
"no_redaction_indicator but also redaction_indicator found",
@ -471,11 +469,13 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials"
anyMatch(textAfter, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)")
)
then
RedactionEntity expandedEntity = entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)");
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
$entityToExpand.removeFromGraph();
retract($entityToExpand);
insert(expandedEntity);
entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)")
.ifPresent(expandedEntity -> {
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
$entityToExpand.remove();
retract($entityToExpand);
insert(expandedEntity);
});
end
@ -484,11 +484,13 @@ rule "CBI.19.0: Expand CBI_author entities with salutation prefix"
when
$entityToExpand: RedactionEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
then
RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*");
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
$entityToExpand.removeFromGraph();
retract($entityToExpand);
insert(expandedEntity);
entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")
.ifPresent(expandedEntity -> {
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
$entityToExpand.remove();
retract($entityToExpand);
insert(expandedEntity);
});
end
@ -836,10 +838,12 @@ rule "PII.12.0: Expand PII entities with salutation prefix"
when
$entityToExpand: RedactionEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
then
RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*");
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
expandedEntity.addEngine(Engine.RULE);
insert(expandedEntity);
entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")
.ifPresent(expandedEntity -> {
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
expandedEntity.addEngine(Engine.RULE);
insert(expandedEntity);
});
end
@ -909,8 +913,9 @@ rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confi
not FileAttribute(label == "Confidentiality", value == "confidential")
$dossierRedaction: RedactionEntity(type == "dossier_redaction")
then
$dossierRedaction.removeFromGraph();
retract($dossierRedaction);
$dossierRedaction.setIgnored(true);
update($dossierRedaction);
$dossierRedaction.getIntersectingNodes().forEach(node -> update(node));
end
@ -970,6 +975,8 @@ rule "AI.0.0: add all NER Entities of type CBI_author"
then
nerEntities.streamEntitiesOfType("CBI_author")
.map(nerEntity -> entityCreationService.byNerEntity(nerEntity, EntityType.RECOMMENDATION, document))
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(entity -> insert(entity));
end
@ -982,6 +989,8 @@ rule "AI.1.0: combine and add NER Entities as CBI_address"
then
nerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
.map(boundary -> entityCreationService.byBoundary(boundary, "CBI_address", EntityType.RECOMMENDATION, document))
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(entity -> {
entity.addEngine(Engine.NER);
insert(entity);
@ -1001,6 +1010,7 @@ rule "MAN.0.0: Apply manual resize redaction"
manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
retract($resizeRedaction);
update($entityToBeResized);
$entityToBeResized.getIntersectingNodes().forEach(node -> update(node));
end
@ -1008,21 +1018,27 @@ rule "MAN.0.0: Apply manual resize redaction"
rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity"
salience 128
when
IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
then
$entityToBeRemoved.setIgnored(true);
update($entityToBeRemoved);
retract($idRemoval);
$entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node));
end
rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image"
salience 128
when
IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
$imageEntityToBeRemoved: Image($id == id)
then
$imageEntityToBeRemoved.setIgnored(true);
update($imageEntityToBeRemoved);
retract($idRemoval);
update($imageEntityToBeRemoved.getParent());
end
@ -1030,11 +1046,16 @@ rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to
rule "MAN.2.0: Apply force redaction"
salience 128
when
ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
$entityToForce: RedactionEntity(matchesAnnotationId($id))
then
$entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis);
$entityToForce.setRemoved(false);
$entityToForce.setIgnored(false);
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
update($entityToForce);
$entityToForce.getIntersectingNodes().forEach(node -> update(node));
retract($force);
end
@ -1042,10 +1063,13 @@ rule "MAN.2.0: Apply force redaction"
rule "MAN.3.0: Apply image recategorization"
salience 128
when
ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
$image: Image($id == id)
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
$imageToBeRecategorized: Image($id == id)
then
$image.setImageType(ImageType.fromString($imageType));
$imageToBeRecategorized.setImageType(ImageType.fromString($imageType));
update($imageToBeRecategorized);
retract($recategorization);
update($imageToBeRecategorized.getParent());
end
@ -1058,7 +1082,7 @@ rule "X.0.0: remove Entity contained by Entity of same type"
$larger: RedactionEntity($type: type, $entityType: entityType)
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger)
then
$contained.removeFromGraph();
$contained.remove();
retract($contained);
end
@ -1070,12 +1094,13 @@ rule "X.1.0: merge intersecting Entities of same type"
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger)
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger)
then
$first.removeFromGraph();
$second.removeFromGraph();
$first.remove();
$second.remove();
RedactionEntity mergedEntity = entityCreationService.byEntities(List.of($first, $second), $type, $entityType, document);
retract($first);
retract($second);
insert(mergedEntity);
mergedEntity.getIntersectingNodes().forEach(node -> update(node));
end
@ -1086,7 +1111,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
$falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE)
$entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger)
then
$entity.removeFromGraph();
$entity.getIntersectingNodes().forEach(node -> update(node));
$entity.remove();
retract($entity)
end
@ -1098,7 +1124,7 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM
$falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION)
$recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
then
$recommendation.removeFromGraph();
$recommendation.remove();
retract($recommendation);
end
@ -1111,7 +1137,7 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit
$recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
then
$entity.addEngines($recommendation.getEngines());
$recommendation.removeFromGraph();
$recommendation.remove();
retract($recommendation);
end
@ -1123,7 +1149,7 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
$entity: RedactionEntity(entityType == EntityType.ENTITY)
$recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
then
$recommendation.removeFromGraph();
$recommendation.remove();
retract($recommendation);
end
@ -1135,7 +1161,8 @@ rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENT
$higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY)
$lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger)
then
$lowerRank.removeFromGraph();
$lowerRank.getIntersectingNodes().forEach(node -> update(node));
$lowerRank.remove();
retract($lowerRank);
end

View File

@ -56,9 +56,11 @@ rule "add NER Entities of type CBI_author or CBI_address"
when
$nerEntity: EntityRecognitionEntity($type: type, (type == "CBI_author" || type == "CBI_address"))
then
RedactionEntity redactionEntity = entityCreationService.byBoundary(new Boundary($nerEntity.getStartOffset(), $nerEntity.getEndOffset()), $type, EntityType.RECOMMENDATION, document);
redactionEntity.addEngine(Engine.NER);
insert(redactionEntity);
entityCreationService.byBoundary(new Boundary($nerEntity.getStartOffset(), $nerEntity.getEndOffset()), $type, EntityType.RECOMMENDATION, document)
.ifPresent(redactionEntity -> {
redactionEntity.addEngine(Engine.NER);
insert(redactionEntity);
});
end
// --------------------------------------- CBI rules -------------------------------------------------------------------
@ -81,91 +83,126 @@ rule "Always redact PII"
$cbiAuthor.apply("PII.0.0", "PII found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
// --------------------------------------- merging rules -------------------------------------------------------------------
//------------------------------------ Entity merging rules ------------------------------------
rule "remove Entity contained by Entity of same type"
// Rule unit: X.0
rule "X.0.0: remove Entity contained by Entity of same type"
salience 65
when
$larger: RedactionEntity($type: type, $entityType: entityType)
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger)
then
$contained.removeFromGraph();
$contained.remove();
retract($contained);
end
rule "merge intersecting Entities of same type"
// Rule unit: X.1
rule "X.1.0: merge intersecting Entities of same type"
salience 64
when
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger)
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger)
then
$first.removeFromGraph();
$second.removeFromGraph();
$first.remove();
$second.remove();
RedactionEntity mergedEntity = entityCreationService.byEntities(List.of($first, $second), $type, $entityType, document);
retract($first);
retract($second);
insert(mergedEntity);
mergedEntity.getIntersectingNodes().forEach(node -> update(node));
end
rule "remove Entity of type ENTITY when contained by FALSE_POSITIVE"
// Rule unit: X.2
rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
salience 64
when
$falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE)
$entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger)
then
$entity.removeFromGraph();
$entity.getIntersectingNodes().forEach(node -> update(node));
$entity.remove();
retract($entity)
end
rule "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"
// Rule unit: X.3
rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"
salience 64
when
$falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION)
$recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
then
$recommendation.removeFromGraph();
$recommendation.remove();
retract($recommendation);
end
rule "remove Entity of type RECOMMENDATION when contained by ENTITY"
salience 64
// Rule unit: X.4
rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"
salience 256
when
$entity: RedactionEntity($type: type, entityType == EntityType.ENTITY)
$recommendation: RedactionEntity(containedBy($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
$recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
then
$recommendation.removeFromGraph();
$entity.addEngines($recommendation.getEngines());
$recommendation.remove();
retract($recommendation);
end
rule "remove Entity of lower rank, when equal boundaries and entityType"
// Rule unit: X.5
rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
salience 256
when
$entity: RedactionEntity(entityType == EntityType.ENTITY)
$recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
then
$recommendation.remove();
retract($recommendation);
end
// Rule unit: X.6
rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY"
salience 32
when
$higherRank: RedactionEntity($type: type, $entityType: entityType, $boundary: boundary)
$lowerRank: RedactionEntity($boundary == boundary, type != $type, entityType == $entityType, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !applied)
$higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY)
$lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger)
then
$lowerRank.removeFromGraph();
$lowerRank.getIntersectingNodes().forEach(node -> update(node));
$lowerRank.remove();
retract($lowerRank);
end
// --------------------------------------- FileAttribute Rules -------------------------------------------------------------------
rule "remove duplicate FileAttributes"
//------------------------------------ File attributes rules ------------------------------------
// Rule unit: FA.1
rule "FA.1.0: remove duplicate FileAttributes"
salience 64
when
$first: FileAttribute($label: label, $value: value)
$second: FileAttribute(this != $first, label == $label, value == $value)
$fileAttribute: FileAttribute($label: label, $value: value)
$duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value)
then
retract($second);
retract($duplicate);
end
// --------------------------------------- local dictionary search -------------------------------------------------------------------
rule "run local dictionary search"
//------------------------------------ Local dictionary search rules ------------------------------------
// Rule unit: LDS.0
rule "LDS.0.0: run local dictionary search"
agenda-group "LOCAL_DICTIONARY_ADDS"
salience -999
when
DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels()
then
entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document)
.forEach(redactionEntity -> insert(redactionEntity));
.forEach(entity -> {
entity.addEngine(Engine.RULE);
insert(entity);
});
end