Merge branch 'RED-6929' into 'master'
RED-6929: fix acceptance tests/rules Closes RED-6929 See merge request redactmanager/redaction-service!32
This commit is contained in:
commit
ee65044578
@ -9,6 +9,7 @@ import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
@ -72,6 +73,8 @@ public class RedactionLogEntryAdapter {
|
||||
return searchImplementation.getBoundaries(node.getTextBlock(), node.getBoundary())
|
||||
.stream()
|
||||
.map(boundary -> entityCreationService.byBoundary(boundary, "temp", EntityType.ENTITY, node))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.collect(groupingBy(entity -> entity.getValue().toLowerCase(Locale.ROOT)));
|
||||
}
|
||||
|
||||
@ -100,8 +103,7 @@ public class RedactionLogEntryAdapter {
|
||||
RedactionEntity correctEntity = entityCreationService.byBoundary(closestEntity.getBoundary(),
|
||||
redactionLogEntry.getType(),
|
||||
redactionLogEntry.isRecommendation() ? EntityType.RECOMMENDATION : EntityType.ENTITY,
|
||||
node);
|
||||
|
||||
node).orElseThrow();
|
||||
String ruleIdentifier = redactionLogEntry.getType() + "." + redactionLogEntry.getMatchedRule() + ".0";
|
||||
if (redactionLogEntry.isRedacted()) {
|
||||
correctEntity.apply(ruleIdentifier, redactionLogEntry.getReason(), redactionLogEntry.getLegalBasis());
|
||||
|
||||
@ -51,6 +51,8 @@ import org.apache.pdfbox.text.TextPosition;
|
||||
import org.apache.pdfbox.text.TextPositionComparator;
|
||||
import org.apache.pdfbox.util.QuickSort;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.utils.SuppressFBWarnings;
|
||||
|
||||
/**
|
||||
* This is just a copy except i only adjusted lines 594-607 cause this is a bug in Pdfbox.
|
||||
* see S416.pdf
|
||||
@ -1737,6 +1739,7 @@ public class PDFTextStripper extends LegacyPDFStreamEngine {
|
||||
}
|
||||
|
||||
|
||||
@SuppressFBWarnings
|
||||
private static Map<Character, Character> MIRRORING_CHAR_MAP = new HashMap<>();
|
||||
|
||||
static {
|
||||
|
||||
@ -83,7 +83,7 @@ public class DocumentGraphFactory {
|
||||
|
||||
List<TextPageBlock> textBlocks = new ArrayList<>(textBlocksToMerge);
|
||||
textBlocks.add(originalTextBlock);
|
||||
AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), node, context, page);
|
||||
AtomicTextBlock textBlock = context.textBlockFactory.fromContext(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), node, context, page);
|
||||
List<Integer> treeId = context.documentTree.createNewChildEntryAndReturnId(parentNode, node);
|
||||
node.setLeafTextBlock(textBlock);
|
||||
node.setTreeId(treeId);
|
||||
@ -145,10 +145,7 @@ public class DocumentGraphFactory {
|
||||
|
||||
Page page = context.getPage(textBlocks.get(0).getPage());
|
||||
Footer footer = Footer.builder().documentTree(context.getDocumentTree()).build();
|
||||
AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks),
|
||||
footer,
|
||||
context,
|
||||
page);
|
||||
AtomicTextBlock textBlock = context.textBlockFactory.fromContext(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), footer, context, page);
|
||||
List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(footer);
|
||||
footer.setTreeId(tocId);
|
||||
footer.setLeafTextBlock(textBlock);
|
||||
@ -160,7 +157,7 @@ public class DocumentGraphFactory {
|
||||
|
||||
Page page = context.getPage(textBlocks.get(0).getPage());
|
||||
Header header = Header.builder().documentTree(context.getDocumentTree()).build();
|
||||
AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), header, 0, page);
|
||||
AtomicTextBlock textBlock = context.textBlockFactory.fromNumberOnPage(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), header, 0, page);
|
||||
List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(header);
|
||||
header.setTreeId(tocId);
|
||||
header.setLeafTextBlock(textBlock);
|
||||
|
||||
@ -80,7 +80,7 @@ public class SectionNodeFactory {
|
||||
remainingBlocks.removeAll(alreadyMerged);
|
||||
|
||||
if (abstractPageBlock instanceof TextPageBlock) {
|
||||
List<TextPageBlock> textBlocks = findTextBlocksWithSameClassificationAndAlignsY(abstractPageBlock, remainingBlocks);
|
||||
List<TextPageBlock> textBlocks = findTextBlocksWithSameClassificationAndAlignsYAndSameOrientation(abstractPageBlock, remainingBlocks);
|
||||
alreadyMerged.addAll(textBlocks);
|
||||
DocumentGraphFactory.addParagraphOrHeadline(section, (TextPageBlock) abstractPageBlock, context, textBlocks);
|
||||
} else if (abstractPageBlock instanceof TablePageBlock tablePageBlock) {
|
||||
@ -162,14 +162,15 @@ public class SectionNodeFactory {
|
||||
}
|
||||
|
||||
|
||||
private List<TextPageBlock> findTextBlocksWithSameClassificationAndAlignsY(AbstractPageBlock atc, List<AbstractPageBlock> pageBlocks) {
|
||||
private List<TextPageBlock> findTextBlocksWithSameClassificationAndAlignsYAndSameOrientation(AbstractPageBlock atc, List<AbstractPageBlock> pageBlocks) {
|
||||
|
||||
return pageBlocks.stream()
|
||||
.filter(abstractTextContainer -> !abstractTextContainer.equals(atc))
|
||||
.filter(abstractTextContainer -> abstractTextContainer.getPage() == atc.getPage())
|
||||
.filter(abstractTextContainer -> abstractTextContainer instanceof TextPageBlock)
|
||||
.filter(abstractTextContainer -> abstractTextContainer.intersectsY(atc))
|
||||
.map(abstractTextContainer -> (TextPageBlock) abstractTextContainer)
|
||||
.filter(abstractPageBlock -> !abstractPageBlock.equals(atc))
|
||||
.filter(abstractPageBlock -> abstractPageBlock.getPage() == atc.getPage())
|
||||
.filter(abstractPageBlock -> abstractPageBlock.getOrientation().equals(atc.getOrientation()))
|
||||
.filter(abstractPageBlock -> abstractPageBlock.intersectsY(atc))
|
||||
.filter(abstractPageBlock -> abstractPageBlock instanceof TextPageBlock)
|
||||
.map(abstractPageBlock -> (TextPageBlock) abstractPageBlock)
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
@ -33,7 +33,11 @@ public class TableNodeFactory {
|
||||
Set<Page> pages = tablesToMerge.stream().map(AbstractPageBlock::getPage).map(context::getPage).collect(Collectors.toSet());
|
||||
List<List<Cell>> mergedRows = tablesToMerge.stream().map(TablePageBlock::getRows).flatMap(Collection::stream).toList();
|
||||
|
||||
Table table = Table.builder().documentTree(context.getDocumentTree()).numberOfCols(mergedRows.isEmpty() ? 0 :mergedRows.get(0).size()).numberOfRows(mergedRows.size()).build();
|
||||
Table table = Table.builder()
|
||||
.documentTree(context.getDocumentTree())
|
||||
.numberOfCols(mergedRows.isEmpty() ? 0 : mergedRows.get(0).size())
|
||||
.numberOfRows(mergedRows.size())
|
||||
.build();
|
||||
|
||||
pages.forEach(page -> addTableToPage(page, parentNode, table));
|
||||
|
||||
@ -109,13 +113,13 @@ public class TableNodeFactory {
|
||||
if (cell.getTextBlocks().isEmpty()) {
|
||||
tableCell.setLeafTextBlock(context.getTextBlockFactory().emptyTextBlock(tableNode, context, page));
|
||||
} else if (cell.getTextBlocks().size() == 1) {
|
||||
textBlock = context.getTextBlockFactory().buildAtomicTextBlock(cell.getTextBlocks().get(0).getSequences(), tableCell, context, page);
|
||||
textBlock = context.getTextBlockFactory().fromContext(cell.getTextBlocks().get(0).getSequences(), tableCell, context, page);
|
||||
tableCell.setLeafTextBlock(textBlock);
|
||||
} else if (firstTextBlockIsHeadline(cell)) {
|
||||
SectionNodeFactory.addSection(tableCell, cell.getTextBlocks().stream().map(tb -> (AbstractPageBlock) tb).toList(), emptyList(), context);
|
||||
} else if (cellAreaIsSmallerThanPageAreaTimesThreshold(cell, page)) {
|
||||
List<TextPositionSequence> sequences = TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(cell.getTextBlocks());
|
||||
textBlock = context.getTextBlockFactory().buildAtomicTextBlock(sequences, tableCell, context, page);
|
||||
textBlock = context.getTextBlockFactory().fromContext(sequences, tableCell, context, page);
|
||||
tableCell.setLeafTextBlock(textBlock);
|
||||
} else {
|
||||
cell.getTextBlocks().forEach(tb -> DocumentGraphFactory.addParagraphOrHeadline(tableCell, tb, context, emptyList()));
|
||||
|
||||
@ -17,14 +17,14 @@ public class TextBlockFactory {
|
||||
long textBlockIdx;
|
||||
|
||||
|
||||
public AtomicTextBlock buildAtomicTextBlock(List<TextPositionSequence> sequences, SemanticNode parent, DocumentGraphFactory.Context context, Page page) {
|
||||
public AtomicTextBlock fromContext(List<TextPositionSequence> sequences, SemanticNode parent, DocumentGraphFactory.Context context, Page page) {
|
||||
|
||||
Integer numberOnPage = context.getAndIncrementTextBlockNumberOnPage(page);
|
||||
return buildAtomicTextBlock(sequences, parent, numberOnPage, page);
|
||||
return fromNumberOnPage(sequences, parent, numberOnPage, page);
|
||||
}
|
||||
|
||||
|
||||
public AtomicTextBlock buildAtomicTextBlock(List<TextPositionSequence> sequences, SemanticNode parent, Integer numberOnPage, Page page) {
|
||||
public AtomicTextBlock fromNumberOnPage(List<TextPositionSequence> sequences, SemanticNode parent, Integer numberOnPage, Page page) {
|
||||
|
||||
SearchTextWithTextPositionDto searchTextWithTextPositionDto = SearchTextWithTextPositionFactory.buildSearchTextToTextPositionModel(sequences);
|
||||
int offset = stringOffset;
|
||||
|
||||
@ -4,18 +4,42 @@ import java.util.Collections;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
public record MatchedRule(RuleIdentifier ruleIdentifier, String reason, String legalBasis, boolean applied, Set<RedactionEntity> references) implements Comparable<MatchedRule> {
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.Getter;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Getter
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@EqualsAndHashCode
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public final class MatchedRule implements Comparable<MatchedRule> {
|
||||
|
||||
@Builder.Default
|
||||
RuleIdentifier ruleIdentifier = RuleIdentifier.empty();
|
||||
@Builder.Default
|
||||
String reason = "";
|
||||
@Builder.Default
|
||||
String legalBasis = "";
|
||||
boolean applied;
|
||||
boolean writeValueWithLineBreaks;
|
||||
@Builder.Default
|
||||
Set<RedactionEntity> references = Collections.emptySet();
|
||||
|
||||
|
||||
public static MatchedRule empty() {
|
||||
|
||||
return new MatchedRule(RuleIdentifier.empty(), "", "", false, Collections.emptySet());
|
||||
return MatchedRule.builder().build();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int compareTo(MatchedRule matchedRule) {
|
||||
|
||||
RuleIdentifier otherRuleIdentifier = matchedRule.ruleIdentifier();
|
||||
RuleIdentifier otherRuleIdentifier = matchedRule.getRuleIdentifier();
|
||||
if (!Objects.equals(ruleIdentifier.type(), otherRuleIdentifier.type())) {
|
||||
if (Objects.equals(otherRuleIdentifier.type(), "MAN")) {
|
||||
return 1;
|
||||
@ -24,10 +48,17 @@ public record MatchedRule(RuleIdentifier ruleIdentifier, String reason, String l
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (!Objects.equals(otherRuleIdentifier.unit(), ruleIdentifier().unit())) {
|
||||
if (!Objects.equals(otherRuleIdentifier.unit(), getRuleIdentifier().unit())) {
|
||||
return otherRuleIdentifier.unit() - ruleIdentifier.unit();
|
||||
}
|
||||
return otherRuleIdentifier.id() - ruleIdentifier.id();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return "MatchedRule[" + "ruleIdentifier=" + ruleIdentifier + ", " + "reason=" + reason + ", " + "legalBasis=" + legalBasis + ", " + "applied=" + applied + ", " + "writeValueWithLineBreaks=" + writeValueWithLineBreaks + ", " + "references=" + references + ']';
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,104 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
|
||||
import lombok.NonNull;
|
||||
|
||||
public interface MatchedRuleHolder {
|
||||
|
||||
PriorityQueue<MatchedRule> getMatchedRuleList();
|
||||
|
||||
|
||||
default boolean isApplied() {
|
||||
|
||||
return getMatchedRule().isApplied();
|
||||
}
|
||||
|
||||
|
||||
default Set<RedactionEntity> getReferences() {
|
||||
|
||||
return getMatchedRule().getReferences();
|
||||
}
|
||||
|
||||
|
||||
default void apply(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis) {
|
||||
|
||||
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
|
||||
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
|
||||
}
|
||||
getMatchedRuleList().add(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).legalBasis(legalBasis).applied(true).build());
|
||||
}
|
||||
|
||||
|
||||
default void applyWithLineBreaks(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis) {
|
||||
|
||||
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
|
||||
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
|
||||
}
|
||||
getMatchedRuleList().add(MatchedRule.builder()
|
||||
.ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier))
|
||||
.reason(reason)
|
||||
.legalBasis(legalBasis)
|
||||
.applied(true)
|
||||
.writeValueWithLineBreaks(true)
|
||||
.build());
|
||||
}
|
||||
|
||||
|
||||
default void applyWithReferences(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis, Collection<RedactionEntity> references) {
|
||||
|
||||
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
|
||||
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
|
||||
}
|
||||
getMatchedRuleList().add(MatchedRule.builder()
|
||||
.ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier))
|
||||
.reason(reason)
|
||||
.legalBasis(legalBasis)
|
||||
.applied(true)
|
||||
.references(new HashSet<>(references))
|
||||
.build());
|
||||
}
|
||||
|
||||
|
||||
default void skip(@NonNull String ruleIdentifier, String reason) {
|
||||
|
||||
getMatchedRuleList().add(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).build());
|
||||
}
|
||||
|
||||
|
||||
default void skipWithReferences(@NonNull String ruleIdentifier, String reason, Collection<RedactionEntity> references) {
|
||||
|
||||
getMatchedRuleList().add(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).references(new HashSet<>(references)).build());
|
||||
}
|
||||
|
||||
|
||||
default void addMatchedRule(MatchedRule matchedRule) {
|
||||
|
||||
getMatchedRuleList().add(matchedRule);
|
||||
}
|
||||
|
||||
|
||||
default void addMatchedRules(Collection<MatchedRule> matchedRules) {
|
||||
|
||||
getMatchedRuleList().addAll(matchedRules);
|
||||
}
|
||||
|
||||
|
||||
default int getMatchedRuleUnit() {
|
||||
|
||||
return getMatchedRule().getRuleIdentifier().unit();
|
||||
}
|
||||
|
||||
|
||||
default MatchedRule getMatchedRule() {
|
||||
|
||||
if (getMatchedRuleList().isEmpty()) {
|
||||
return MatchedRule.empty();
|
||||
}
|
||||
return getMatchedRuleList().peek();
|
||||
}
|
||||
|
||||
}
|
||||
@ -2,7 +2,6 @@ package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.e
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
@ -22,7 +21,6 @@ import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.NonNull;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Data
|
||||
@ -30,7 +28,7 @@ import lombok.experimental.FieldDefaults;
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
|
||||
public class RedactionEntity {
|
||||
public class RedactionEntity implements MatchedRuleHolder {
|
||||
|
||||
// initial values
|
||||
@EqualsAndHashCode.Include
|
||||
@ -55,7 +53,6 @@ public class RedactionEntity {
|
||||
PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
|
||||
|
||||
// inferred on graph insertion
|
||||
@EqualsAndHashCode.Include
|
||||
String value;
|
||||
String textBefore;
|
||||
String textAfter;
|
||||
@ -73,18 +70,6 @@ public class RedactionEntity {
|
||||
}
|
||||
|
||||
|
||||
public boolean isApplied() {
|
||||
|
||||
return getMatchedRule().applied();
|
||||
}
|
||||
|
||||
|
||||
public Set<RedactionEntity> getReferences() {
|
||||
|
||||
return getMatchedRule().references();
|
||||
}
|
||||
|
||||
|
||||
public boolean occursInNodeOfType(Class<? extends SemanticNode> clazz) {
|
||||
|
||||
return intersectingNodes.stream().anyMatch(clazz::isInstance);
|
||||
@ -121,6 +106,12 @@ public class RedactionEntity {
|
||||
}
|
||||
|
||||
|
||||
public String getValueWithLineBreaks() {
|
||||
|
||||
return getDeepestFullyContainingNode().getTextBlock().subSequenceWithLineBreaks(getBoundary());
|
||||
}
|
||||
|
||||
|
||||
public void removeFromGraph() {
|
||||
|
||||
intersectingNodes.forEach(node -> node.getEntities().remove(this));
|
||||
@ -129,67 +120,21 @@ public class RedactionEntity {
|
||||
deepestFullyContainingNode = null;
|
||||
pages = new HashSet<>();
|
||||
removed = true;
|
||||
}
|
||||
|
||||
|
||||
public void remove() {
|
||||
|
||||
removed = true;
|
||||
}
|
||||
|
||||
|
||||
public void ignore() {
|
||||
|
||||
ignored = true;
|
||||
}
|
||||
|
||||
|
||||
public void apply(@NonNull String ruleIdentifier, String comment, @NonNull String legalBasis) {
|
||||
|
||||
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
|
||||
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
|
||||
}
|
||||
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, legalBasis, true, Collections.emptySet()));
|
||||
}
|
||||
|
||||
|
||||
public void applyWithReferences(@NonNull String ruleIdentifier, String comment, @NonNull String legalBasis, Collection<RedactionEntity> references) {
|
||||
|
||||
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
|
||||
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
|
||||
}
|
||||
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, legalBasis, true, new HashSet<>(references)));
|
||||
}
|
||||
|
||||
|
||||
public void skip(@NonNull String ruleIdentifier, String comment) {
|
||||
|
||||
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, "", false, Collections.emptySet()));
|
||||
}
|
||||
|
||||
|
||||
public void skipWithReferences(@NonNull String ruleIdentifier, String comment, Collection<RedactionEntity> references) {
|
||||
|
||||
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, "", false, new HashSet<>(references)));
|
||||
}
|
||||
|
||||
|
||||
public void addMatchedRule(MatchedRule matchedRule) {
|
||||
|
||||
matchedRuleList.add(matchedRule);
|
||||
}
|
||||
|
||||
|
||||
public void addMatchedRules(Collection<MatchedRule> matchedRules) {
|
||||
|
||||
matchedRuleList.addAll(matchedRules);
|
||||
}
|
||||
|
||||
|
||||
public int getMatchedRuleUnit() {
|
||||
|
||||
return getMatchedRule().ruleIdentifier().unit();
|
||||
}
|
||||
|
||||
|
||||
public MatchedRule getMatchedRule() {
|
||||
|
||||
if (matchedRuleList.isEmpty()) {
|
||||
return MatchedRule.empty();
|
||||
}
|
||||
return matchedRuleList.peek();
|
||||
}
|
||||
|
||||
|
||||
public List<RedactionPosition> getRedactionPositionsPerPage() {
|
||||
|
||||
if (redactionPositionsPerPage == null || redactionPositionsPerPage.isEmpty()) {
|
||||
|
||||
@ -35,6 +35,8 @@ public class Document implements GenericSemanticNode {
|
||||
TextBlock textBlock;
|
||||
@Builder.Default
|
||||
Set<RedactionEntity> entities = new HashSet<>();
|
||||
@Builder.Default
|
||||
static final SectionIdentifier sectionIdentifier = SectionIdentifier.document();
|
||||
|
||||
|
||||
@Override
|
||||
@ -79,6 +81,13 @@ public class Document implements GenericSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public SectionIdentifier getSectionIdentifier() {
|
||||
|
||||
return sectionIdentifier;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Headline getHeadline() {
|
||||
|
||||
|
||||
@ -23,6 +23,9 @@ import lombok.experimental.FieldDefaults;
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class Footer implements GenericSemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
final static SectionIdentifier sectionIdentifier = SectionIdentifier.empty();
|
||||
|
||||
List<Integer> treeId;
|
||||
TextBlock leafTextBlock;
|
||||
|
||||
@ -55,6 +58,13 @@ public class Footer implements GenericSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public SectionIdentifier getSectionIdentifier() {
|
||||
|
||||
return sectionIdentifier;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
|
||||
@ -23,6 +23,9 @@ import lombok.experimental.FieldDefaults;
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class Header implements GenericSemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
final static SectionIdentifier sectionIdentifier = SectionIdentifier.empty();
|
||||
|
||||
List<Integer> treeId;
|
||||
TextBlock leafTextBlock;
|
||||
|
||||
@ -55,6 +58,13 @@ public class Header implements GenericSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public SectionIdentifier getSectionIdentifier() {
|
||||
|
||||
return sectionIdentifier;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
|
||||
@ -26,6 +26,7 @@ public class Headline implements GenericSemanticNode {
|
||||
|
||||
List<Integer> treeId;
|
||||
TextBlock leafTextBlock;
|
||||
SectionIdentifier sectionIdentifier;
|
||||
|
||||
@EqualsAndHashCode.Exclude
|
||||
DocumentTree documentTree;
|
||||
@ -70,12 +71,24 @@ public class Headline implements GenericSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public SectionIdentifier getSectionIdentifier() {
|
||||
|
||||
if (sectionIdentifier == null) {
|
||||
sectionIdentifier = SectionIdentifier.fromSearchText(getTextBlock().getSearchText());
|
||||
}
|
||||
return sectionIdentifier;
|
||||
}
|
||||
|
||||
|
||||
public static Headline empty() {
|
||||
|
||||
return Headline.builder().leafTextBlock(AtomicTextBlock.empty(-1L, 0, new Page(), -1, null)).build();
|
||||
}
|
||||
|
||||
public boolean hasParagraphs(){
|
||||
|
||||
public boolean hasParagraphs() {
|
||||
|
||||
return getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).findFirst().isPresent();
|
||||
}
|
||||
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
@ -12,8 +11,8 @@ import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.MatchedRule;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.MatchedRuleHolder;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RuleIdentifier;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
|
||||
|
||||
@ -23,7 +22,6 @@ import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.NonNull;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Data
|
||||
@ -31,7 +29,7 @@ import lombok.experimental.FieldDefaults;
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class Image implements GenericSemanticNode {
|
||||
public class Image implements GenericSemanticNode, MatchedRuleHolder {
|
||||
|
||||
List<Integer> treeId;
|
||||
String id;
|
||||
@ -40,6 +38,7 @@ public class Image implements GenericSemanticNode {
|
||||
boolean transparent;
|
||||
Rectangle2D position;
|
||||
|
||||
boolean removed;
|
||||
boolean ignored;
|
||||
|
||||
@Builder.Default
|
||||
@ -56,72 +55,21 @@ public class Image implements GenericSemanticNode {
|
||||
Set<RedactionEntity> entities = new HashSet<>();
|
||||
|
||||
|
||||
public void apply(@NonNull String ruleIdentifier, String comment, @NonNull String legalBasis) {
|
||||
public boolean isActive() {
|
||||
|
||||
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
|
||||
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
|
||||
}
|
||||
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, legalBasis, true, Collections.emptySet()));
|
||||
return !removed && !ignored;
|
||||
}
|
||||
|
||||
|
||||
public void applyWithReferences(@NonNull String ruleIdentifier, String comment, @NonNull String legalBasis, Collection<RedactionEntity> references) {
|
||||
public void ignore() {
|
||||
|
||||
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
|
||||
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
|
||||
}
|
||||
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, legalBasis, true, new HashSet<>(references)));
|
||||
ignored = true;
|
||||
}
|
||||
|
||||
|
||||
public void skip(@NonNull String ruleIdentifier, String comment) {
|
||||
public void remove() {
|
||||
|
||||
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, "", false, Collections.emptySet()));
|
||||
}
|
||||
|
||||
|
||||
public void skipWithReferences(@NonNull String ruleIdentifier, String comment, Collection<RedactionEntity> references) {
|
||||
|
||||
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, "", false, new HashSet<>(references)));
|
||||
}
|
||||
|
||||
|
||||
public void addMatchedRule(MatchedRule matchedRule) {
|
||||
|
||||
matchedRuleList.add(matchedRule);
|
||||
}
|
||||
|
||||
|
||||
public void addMatchedRules(Collection<MatchedRule> matchedRules) {
|
||||
|
||||
matchedRuleList.addAll(matchedRules);
|
||||
}
|
||||
|
||||
|
||||
public boolean isApplied() {
|
||||
|
||||
return getMatchedRule().applied();
|
||||
}
|
||||
|
||||
|
||||
public Set<RedactionEntity> getReferences() {
|
||||
|
||||
return getMatchedRule().references();
|
||||
}
|
||||
|
||||
|
||||
public int getMatchedRuleUnit() {
|
||||
|
||||
return getMatchedRule().ruleIdentifier().unit();
|
||||
}
|
||||
|
||||
|
||||
public MatchedRule getMatchedRule() {
|
||||
|
||||
if (matchedRuleList.isEmpty()) {
|
||||
return MatchedRule.empty();
|
||||
}
|
||||
return matchedRuleList.peek();
|
||||
removed = true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -8,7 +8,6 @@ import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Do
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -49,6 +48,13 @@ public class Section implements GenericSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public SectionIdentifier getSectionIdentifier() {
|
||||
|
||||
return getHeadline().getSectionIdentifier();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public TextBlock getTextBlock() {
|
||||
|
||||
@ -75,19 +81,22 @@ public class Section implements GenericSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
public boolean anyHeadlineContainsString(String value){
|
||||
public boolean anyHeadlineContainsString(String value) {
|
||||
|
||||
return streamChildrenOfType(NodeType.HEADLINE)//
|
||||
.map(node -> (Headline) node).anyMatch(h -> h.containsString(value));
|
||||
}
|
||||
|
||||
|
||||
public boolean anyHeadlineContainsStringIgnoreCase(String value){
|
||||
public boolean anyHeadlineContainsStringIgnoreCase(String value) {
|
||||
|
||||
return streamChildrenOfType(NodeType.HEADLINE)//
|
||||
.map(node -> (Headline) node).anyMatch(h -> h.containsStringIgnoreCase(value));
|
||||
}
|
||||
|
||||
|
||||
public boolean hasParagraphs(){
|
||||
public boolean hasParagraphs() {
|
||||
|
||||
return streamAllSubNodesOfType(NodeType.PARAGRAPH).findFirst().isPresent();
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,123 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class SectionIdentifier {
|
||||
|
||||
static Pattern numericalIdentifierPattern = Pattern.compile("^[\\s]?(\\d+)[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?");
|
||||
|
||||
private enum Format {
|
||||
EMPTY,
|
||||
NUMERICAL,
|
||||
DOCUMENT
|
||||
}
|
||||
|
||||
Format format;
|
||||
String identifierString;
|
||||
List<Integer> identifiers;
|
||||
boolean asChild;
|
||||
|
||||
|
||||
public static SectionIdentifier fromSearchText(String headline) {
|
||||
|
||||
if (headline == null || headline.isEmpty() || headline.isBlank()) {
|
||||
return SectionIdentifier.empty();
|
||||
}
|
||||
|
||||
Matcher numericalIdentifierMatcher = numericalIdentifierPattern.matcher(headline);
|
||||
if (numericalIdentifierMatcher.find()) {
|
||||
return buildNumericalSectionIdentifier(headline, numericalIdentifierMatcher);
|
||||
}
|
||||
// more formats here
|
||||
return SectionIdentifier.empty();
|
||||
}
|
||||
|
||||
|
||||
public static SectionIdentifier asChildOf(SectionIdentifier sectionIdentifier) {
|
||||
|
||||
return new SectionIdentifier(sectionIdentifier.format, sectionIdentifier.toString(), sectionIdentifier.identifiers, true);
|
||||
}
|
||||
|
||||
|
||||
public static SectionIdentifier document() {
|
||||
|
||||
return new SectionIdentifier(Format.DOCUMENT, "document", Collections.emptyList(), false);
|
||||
}
|
||||
|
||||
|
||||
public static SectionIdentifier empty() {
|
||||
|
||||
return new SectionIdentifier(Format.EMPTY, "empty", Collections.emptyList(), false);
|
||||
}
|
||||
|
||||
|
||||
private static SectionIdentifier buildNumericalSectionIdentifier(String headline, Matcher numericalIdentifierMatcher) {
|
||||
|
||||
String identifierString = headline.substring(numericalIdentifierMatcher.start(), numericalIdentifierMatcher.end());
|
||||
List<Integer> identifiers = new LinkedList<>();
|
||||
for (int i = 1; i <= 4; i++) {
|
||||
String numericalIdentifier = numericalIdentifierMatcher.group(i);
|
||||
if (numericalIdentifier == null || numericalIdentifier.equals("0") || numericalIdentifier.isEmpty() || numericalIdentifier.isBlank()) {
|
||||
break;
|
||||
}
|
||||
identifiers.add(Integer.parseInt(numericalIdentifier.trim()));
|
||||
}
|
||||
return new SectionIdentifier(Format.NUMERICAL, identifierString, identifiers.stream().toList(), false);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Determines if the current section is the parent of the given section.
|
||||
*
|
||||
* @param sectionIdentifier The section identifier to compare against.
|
||||
* @return true if the current section is the parent of the given section, false otherwise.
|
||||
*/
|
||||
public boolean isParentOf(SectionIdentifier sectionIdentifier) {
|
||||
|
||||
if (this.format.equals(Format.EMPTY)) {
|
||||
return false;
|
||||
}
|
||||
if (this.format.equals(Format.DOCUMENT)) {
|
||||
return true;
|
||||
}
|
||||
if (!this.format.equals(sectionIdentifier.format)) {
|
||||
return false;
|
||||
}
|
||||
if (this.identifiers.size() >= sectionIdentifier.identifiers.size() && !(this.identifiers.size() == sectionIdentifier.identifiers.size() && sectionIdentifier.asChild)) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < this.identifiers.size(); i++) {
|
||||
if (!this.identifiers.get(i).equals(sectionIdentifier.identifiers.get(i))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
public boolean isChildOf(SectionIdentifier sectionIdentifier) {
|
||||
|
||||
if (this.format.equals(Format.DOCUMENT) || this.format.equals(Format.EMPTY)) {
|
||||
return false;
|
||||
}
|
||||
return sectionIdentifier.isParentOf(this);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return identifierString;
|
||||
}
|
||||
|
||||
}
|
||||
@ -115,6 +115,17 @@ public interface SemanticNode {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a SectionIdentifier, such that it acts as a child of the first Headline associated with this SemanticNode.
|
||||
*
|
||||
* @return The SectionIdentifier from the first Headline.
|
||||
*/
|
||||
default SectionIdentifier getSectionIdentifier() {
|
||||
|
||||
return SectionIdentifier.asChildOf(getHeadline().getSectionIdentifier());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if its TreeId has a length greater than zero.
|
||||
*
|
||||
|
||||
@ -9,6 +9,8 @@ import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.AtomicPositionBlockData;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.AtomicTextBlockData;
|
||||
@ -200,6 +202,38 @@ public class AtomicTextBlock implements TextBlock {
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String subSequenceWithLineBreaks(Boundary boundary) {
|
||||
|
||||
if (boundary.length() == 0 || !getBoundary().contains(boundary)) {
|
||||
return "";
|
||||
}
|
||||
|
||||
CharSequence subSequence = subSequence(boundary);
|
||||
Set<Integer> lbInBoundary = lineBreaks.stream().filter(boundary::contains).collect(Collectors.toSet());
|
||||
if (boundary.end() == getBoundary().end()) {
|
||||
lbInBoundary.add(getBoundary().length());
|
||||
}
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 0; i < subSequence.length(); i++) {
|
||||
char character = subSequence.charAt(i);
|
||||
if (lbInBoundary.contains(i + 1)) {
|
||||
// always plus one, due to the linebreaks being an exclusive end index
|
||||
if (!Character.isWhitespace(character)) {
|
||||
lbInBoundary.remove(i + 1);
|
||||
lbInBoundary.add(i + 2);
|
||||
sb.append(character);
|
||||
continue;
|
||||
}
|
||||
sb.append("\n");
|
||||
} else {
|
||||
sb.append(character);
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
private List<Integer> getAllLineBreaksInBoundary(Boundary boundary) {
|
||||
|
||||
return getLineBreaks().stream().map(linebreak -> linebreak + this.boundary.start()).filter(boundary::contains).toList();
|
||||
|
||||
@ -172,6 +172,34 @@ public class ConcatenatedTextBlock implements TextBlock {
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String subSequenceWithLineBreaks(Boundary boundary) {
|
||||
|
||||
if (boundary.length() == 0 || !getBoundary().contains(boundary)) {
|
||||
return "";
|
||||
}
|
||||
|
||||
List<AtomicTextBlock> textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(boundary);
|
||||
|
||||
if (textBlocks.size() == 1) {
|
||||
return textBlocks.get(0).subSequenceWithLineBreaks(boundary);
|
||||
}
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
AtomicTextBlock firstTextBlock = textBlocks.get(0);
|
||||
sb.append(firstTextBlock.subSequenceWithLineBreaks(new Boundary(boundary.start(), firstTextBlock.getBoundary().end())));
|
||||
|
||||
for (AtomicTextBlock textBlock : textBlocks.subList(1, textBlocks.size() - 1)) {
|
||||
sb.append(textBlock.searchTextWithLineBreaks());
|
||||
}
|
||||
|
||||
var lastTextBlock = textBlocks.get(textBlocks.size() - 1);
|
||||
sb.append(lastTextBlock.subSequenceWithLineBreaks(new Boundary(lastTextBlock.getBoundary().start(), boundary.end())));
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
private Map<Page, List<Rectangle2D>> mergeEntityPositionsWithSamePageNode(Map<Page, List<Rectangle2D>> map1, Map<Page, List<Rectangle2D>> map2) {
|
||||
|
||||
Map<Page, List<Rectangle2D>> mergedMap = new HashMap<>(map1);
|
||||
|
||||
@ -42,9 +42,18 @@ public interface TextBlock extends CharSequence {
|
||||
Map<Page, List<Rectangle2D>> getPositionsPerPage(Boundary stringBoundary);
|
||||
|
||||
|
||||
String subSequenceWithLineBreaks(Boundary boundary);
|
||||
|
||||
|
||||
int numberOfLines();
|
||||
|
||||
|
||||
default String searchTextWithLineBreaks() {
|
||||
|
||||
return subSequenceWithLineBreaks(getBoundary());
|
||||
}
|
||||
|
||||
|
||||
default int indexOf(String searchTerm) {
|
||||
|
||||
return indexOf(searchTerm, getBoundary().start());
|
||||
|
||||
@ -23,6 +23,7 @@ import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Do
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionPosition;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.NodeType;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.TableCell;
|
||||
@ -90,7 +91,9 @@ public class EntityCreationService {
|
||||
return entityBoundaries.stream()
|
||||
.map(boundary -> boundary.trim(node.getTextBlock()))
|
||||
.filter(boundary -> isValidEntityBoundary(node.getTextBlock(), boundary))
|
||||
.map(boundary -> byBoundary(boundary, type, entityType, node));
|
||||
.map(boundary -> byBoundary(boundary, type, entityType, node))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
@ -129,7 +132,9 @@ public class EntityCreationService {
|
||||
return searchImplementation.getBoundaries(node.getTextBlock(), node.getBoundary())
|
||||
.stream()
|
||||
.filter(boundary -> isValidEntityBoundary(node.getTextBlock(), boundary))
|
||||
.map(bounds -> byBoundary(bounds, type, entityType, node));
|
||||
.map(bounds -> byBoundary(bounds, type, entityType, node))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
@ -141,7 +146,9 @@ public class EntityCreationService {
|
||||
.stream()
|
||||
.map(boundary -> toLineAfterBoundary(textBlock, boundary))
|
||||
.filter(boundary -> isValidEntityBoundary(textBlock, boundary))
|
||||
.map(boundary -> byBoundary(boundary, type, entityType, node));
|
||||
.map(boundary -> byBoundary(boundary, type, entityType, node))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
@ -152,19 +159,24 @@ public class EntityCreationService {
|
||||
.stream()
|
||||
.map(boundary -> toLineAfterBoundary(textBlock, boundary))
|
||||
.filter(boundary -> isValidEntityBoundary(textBlock, boundary))
|
||||
.map(boundary -> byBoundary(boundary, type, entityType, node));
|
||||
.map(boundary -> byBoundary(boundary, type, entityType, node))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
public Stream<RedactionEntity> byRegexWithLinebreaks(String regexPattern, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return byRegexWithLinebreaks(regexPattern, type, entityType, 0, node);
|
||||
public Stream<RedactionEntity> byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return byRegexWithLineBreaks(regexPattern, type, entityType, 0, node);
|
||||
}
|
||||
|
||||
public Stream<RedactionEntity> byRegexWithLinebreaksIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return byRegexWithLinebreaksIgnoreCase(regexPattern, type, entityType, 0, node);
|
||||
public Stream<RedactionEntity> byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return byRegexWithLineBreaksIgnoreCase(regexPattern, type, entityType, 0, node);
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> byRegex(String regexPattern, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return byRegex(regexPattern, type, entityType, 0, node);
|
||||
@ -177,20 +189,33 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> byRegexWithLinebreaks(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
|
||||
public Stream<RedactionEntity> byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
|
||||
|
||||
return RedactionSearchUtility.findBoundariesByRegexWithLinebreaks(regexPattern, group, node.getTextBlock()).stream().map(boundary -> byBoundary(boundary, type, entityType, node));
|
||||
return RedactionSearchUtility.findBoundariesByRegexWithLineBreaks(regexPattern, group, node.getTextBlock())
|
||||
.stream()
|
||||
.map(boundary -> byBoundary(boundary, type, entityType, node))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> byRegexWithLinebreaksIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
|
||||
public Stream<RedactionEntity> byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
|
||||
|
||||
return RedactionSearchUtility.findBoundariesByRegexWithLinebreaksIgnoreCase(regexPattern, group, node.getTextBlock()).stream().map(boundary -> byBoundary(boundary, type, entityType, node));
|
||||
return RedactionSearchUtility.findBoundariesByRegexWithLineBreaksIgnoreCase(regexPattern, group, node.getTextBlock())
|
||||
.stream()
|
||||
.map(boundary -> byBoundary(boundary, type, entityType, node))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> byRegex(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
|
||||
|
||||
return RedactionSearchUtility.findBoundariesByRegex(regexPattern, group, node.getTextBlock()).stream().map(boundary -> byBoundary(boundary, type, entityType, node));
|
||||
return RedactionSearchUtility.findBoundariesByRegex(regexPattern, group, node.getTextBlock())
|
||||
.stream()
|
||||
.map(boundary -> byBoundary(boundary, type, entityType, node))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
@ -198,13 +223,25 @@ public class EntityCreationService {
|
||||
|
||||
return RedactionSearchUtility.findBoundariesByRegexIgnoreCase(regexPattern, group, node.getTextBlock())
|
||||
.stream()
|
||||
.map(boundary -> byBoundary(boundary, type, entityType, node));
|
||||
.map(boundary -> byBoundary(boundary, type, entityType, node))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> byString(String keyword, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return RedactionSearchUtility.findBoundariesByString(keyword, node.getTextBlock()).stream().map(boundary -> byBoundary(boundary, type, entityType, node));
|
||||
return RedactionSearchUtility.findBoundariesByString(keyword, node.getTextBlock())
|
||||
.stream()
|
||||
.map(boundary -> byBoundary(boundary, type, entityType, node))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> bySemanticNodeParagraphsOnly(SemanticNode node, String type, EntityType entityType) {
|
||||
|
||||
return node.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(semanticNode -> bySemanticNode(semanticNode, type, entityType)).filter(Optional::isPresent).map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
@ -218,18 +255,18 @@ public class EntityCreationService {
|
||||
if (!isValidEntityBoundary(node.getTextBlock(), boundary)) {
|
||||
return Optional.empty();
|
||||
}
|
||||
return Optional.of(byBoundary(boundary, type, entityType, node));
|
||||
return byBoundary(boundary, type, entityType, node);
|
||||
}
|
||||
|
||||
|
||||
public RedactionEntity byPrefixExpansionRegex(RedactionEntity entity, String regexPattern) {
|
||||
public Optional<RedactionEntity> byPrefixExpansionRegex(RedactionEntity entity, String regexPattern) {
|
||||
|
||||
int expandedStart = getExpandedStartByRegex(entity, regexPattern);
|
||||
return byBoundary(new Boundary(expandedStart, entity.getBoundary().end()), entity.getType(), entity.getEntityType(), entity.getDeepestFullyContainingNode());
|
||||
}
|
||||
|
||||
|
||||
public RedactionEntity bySuffixExpansionRegex(RedactionEntity entity, String regexPattern) {
|
||||
public Optional<RedactionEntity> bySuffixExpansionRegex(RedactionEntity entity, String regexPattern) {
|
||||
|
||||
int expandedEnd = getExpandedEndByRegex(entity, regexPattern);
|
||||
expandedEnd = truncateEndIfLineBreakIsBetween(entity.getBoundary().end(), expandedEnd, entity.getDeepestFullyContainingNode().getTextBlock());
|
||||
@ -246,7 +283,32 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public RedactionEntity byBoundary(Boundary boundary, String type, EntityType entityType, SemanticNode node) {
|
||||
/**
|
||||
* Creates a redaction entity based on the given boundary, type, entity type, and semantic node.
|
||||
* If the document already contains an equal redaction entity, then en empty Optional is returned.
|
||||
*
|
||||
* @param boundary The boundary of the redaction entity.
|
||||
* @param type The type of the redaction entity.
|
||||
* @param entityType The entity type of the redaction entity.
|
||||
* @param node The semantic node to associate with the redaction entity.
|
||||
* @return An Optional containing the redaction entity, or an empty Optional if the entity already exists.
|
||||
*/
|
||||
public Optional<RedactionEntity> byBoundary(Boundary boundary, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
if (!node.getBoundary().contains(boundary)) {
|
||||
throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", boundary, node.getBoundary(), node));
|
||||
}
|
||||
Boundary trimmedBoundary = boundary.trim(node.getTextBlock());
|
||||
RedactionEntity entity = RedactionEntity.initialEntityNode(trimmedBoundary, type, entityType);
|
||||
if (node.getEntities().contains(entity)) {
|
||||
return Optional.empty();
|
||||
}
|
||||
addEntityToGraph(entity, node);
|
||||
return Optional.of(entity);
|
||||
}
|
||||
|
||||
|
||||
public RedactionEntity forceByBoundary(Boundary boundary, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
Boundary trimmedBoundary = boundary.trim(node.getTextBlock());
|
||||
RedactionEntity entity = RedactionEntity.initialEntityNode(trimmedBoundary, type, entityType);
|
||||
@ -281,19 +343,15 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public RedactionEntity byNerEntity(NerEntities.NerEntity nerEntity, EntityType entityType, SemanticNode semanticNode) {
|
||||
public Optional<RedactionEntity> byNerEntity(NerEntities.NerEntity nerEntity, EntityType entityType, SemanticNode semanticNode) {
|
||||
|
||||
RedactionEntity entity = byBoundary(nerEntity.boundary(), nerEntity.type(), entityType, semanticNode);
|
||||
entity.addEngine(Engine.NER);
|
||||
return entity;
|
||||
return byBoundary(nerEntity.boundary(), nerEntity.type(), entityType, semanticNode).stream().peek(entity -> entity.addEngine(Engine.NER)).findAny();
|
||||
}
|
||||
|
||||
|
||||
public RedactionEntity byNerEntity(NerEntities.NerEntity nerEntity, String type, EntityType entityType, SemanticNode semanticNode) {
|
||||
public Optional<RedactionEntity> byNerEntity(NerEntities.NerEntity nerEntity, String type, EntityType entityType, SemanticNode semanticNode) {
|
||||
|
||||
RedactionEntity entity = byBoundary(nerEntity.boundary(), type, entityType, semanticNode);
|
||||
entity.addEngine(Engine.NER);
|
||||
return entity;
|
||||
return byBoundary(nerEntity.boundary(), type, entityType, semanticNode).stream().peek(entity -> entity.addEngine(Engine.NER)).findAny();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -116,7 +116,7 @@ public class RectangleTransformations {
|
||||
@Override
|
||||
public BiConsumer<BBox, Rectangle2D> accumulator() {
|
||||
|
||||
return (bb, rect) -> bb.addRectangle(rect.getMinX(), rect.getMinY(), rect.getMaxX(), rect.getMaxY());
|
||||
return BBox::addRectangle;
|
||||
}
|
||||
|
||||
|
||||
@ -154,7 +154,12 @@ public class RectangleTransformations {
|
||||
Double upperRightY;
|
||||
|
||||
|
||||
public void addRectangle(double lowerLeftX, double lowerLeftY, double upperRightX, double upperRightY) {
|
||||
public void addRectangle(Rectangle2D rectangle2D) {
|
||||
|
||||
double lowerLeftX = Math.min(rectangle2D.getMinX(), rectangle2D.getMaxX());
|
||||
double lowerLeftY = Math.min(rectangle2D.getMinY(), rectangle2D.getMaxY());
|
||||
double upperRightX = Math.max(rectangle2D.getMinX(), rectangle2D.getMaxX());
|
||||
double upperRightY = Math.max(rectangle2D.getMinY(), rectangle2D.getMaxY());
|
||||
|
||||
if (this.lowerLeftX == null) {
|
||||
this.lowerLeftX = lowerLeftX;
|
||||
|
||||
@ -102,17 +102,17 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
public static List<Boundary> findBoundariesByRegexWithLinebreaks(String regexPattern, int group, TextBlock textBlock) {
|
||||
public static List<Boundary> findBoundariesByRegexWithLineBreaks(String regexPattern, int group, TextBlock textBlock) {
|
||||
|
||||
Pattern pattern = Patterns.getCompiledMultilinePattern(regexPattern, false);
|
||||
return getBoundariesByPatternWithLinebreaks(textBlock, group, pattern);
|
||||
return getBoundariesByPatternWithLineBreaks(textBlock, group, pattern);
|
||||
}
|
||||
|
||||
|
||||
public static List<Boundary> findBoundariesByRegexWithLinebreaksIgnoreCase(String regexPattern, int group, TextBlock textBlock) {
|
||||
public static List<Boundary> findBoundariesByRegexWithLineBreaksIgnoreCase(String regexPattern, int group, TextBlock textBlock) {
|
||||
|
||||
Pattern pattern = Patterns.getCompiledMultilinePattern(regexPattern, true);
|
||||
return getBoundariesByPatternWithLinebreaks(textBlock, group, pattern);
|
||||
return getBoundariesByPatternWithLineBreaks(textBlock, group, pattern);
|
||||
}
|
||||
|
||||
|
||||
@ -134,21 +134,10 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
private static List<Boundary> getBoundariesByPatternWithLinebreaks(TextBlock textBlock, int group, Pattern pattern) {
|
||||
private static List<Boundary> getBoundariesByPatternWithLineBreaks(TextBlock textBlock, int group, Pattern pattern) {
|
||||
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
textBlock.getAtomicTextBlocks().forEach(at -> {
|
||||
if (at.numberOfLines() > 1) {
|
||||
for (int i = 0; i < at.numberOfLines(); i++) {
|
||||
stringBuilder.append(at.getLine(i));
|
||||
stringBuilder.setCharAt(stringBuilder.length() - 1, '\n');
|
||||
}
|
||||
} else {
|
||||
stringBuilder.append(at.getSearchText()).setCharAt(stringBuilder.length() - 1, '\n');
|
||||
}
|
||||
});
|
||||
|
||||
Matcher matcher = pattern.matcher(stringBuilder.toString());
|
||||
String searchTextWithLineBreaks = textBlock.searchTextWithLineBreaks();
|
||||
Matcher matcher = pattern.matcher(searchTextWithLineBreaks);
|
||||
List<Boundary> boundaries = new LinkedList<>();
|
||||
while (matcher.find()) {
|
||||
boundaries.add(new Boundary(matcher.start(group) + textBlock.getBoundary().start(), matcher.end(group) + textBlock.getBoundary().start()));
|
||||
|
||||
@ -84,6 +84,7 @@ public class AnalyzeService {
|
||||
@Timed("redactmanager_analyzeDocumentStructure")
|
||||
public AnalyzeResult analyzeDocumentStructure(StructureAnalyzeRequest analyzeRequest) {
|
||||
|
||||
log.info("Starting Structure Analysis for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
ClassificationDocument classifiedDoc;
|
||||
@ -92,25 +93,29 @@ public class AnalyzeService {
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId(),
|
||||
FileType.ORIGIN));
|
||||
|
||||
log.info("Loaded PDF for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
Map<Integer, List<ClassifiedImage>> pdfImages = null;
|
||||
if (redactionServiceSettings.isEnableImageClassification()) {
|
||||
pdfImages = imageServiceResponseAdapter.convertImages(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
log.info("Loaded image service response for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
}
|
||||
|
||||
classifiedDoc = pdfSegmentationService.parseDocument(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), storedObjectStream, pdfImages);
|
||||
log.info("Parsed document for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
} catch (Exception e) {
|
||||
throw new RedactionException(e);
|
||||
}
|
||||
|
||||
Document document = DocumentGraphFactory.buildDocumentGraph(classifiedDoc);
|
||||
log.info("Built Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
SectionGrid sectionGrid = sectionGridCreatorService.createSectionGrid(document);
|
||||
log.info("Built section grid for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
log.info("Store document graph, text, simplified text, and section grid for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.TEXT, DocumentData.fromDocument(document));
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.SIMPLIFIED_TEXT, toSimplifiedText(document));
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, sectionGrid);
|
||||
log.info("Stored document graph, text, simplified text, and section grid for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
return AnalyzeResult.builder()
|
||||
.dossierId(analyzeRequest.getDossierId())
|
||||
@ -125,21 +130,27 @@ public class AnalyzeService {
|
||||
@Timed("redactmanager_analyze")
|
||||
public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) {
|
||||
|
||||
log.info("Starting Analysis for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
long startTime = System.currentTimeMillis();
|
||||
Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
|
||||
log.info("Loaded Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
NerEntities nerEntities = getEntityRecognitionEntities(analyzeRequest, document);
|
||||
log.info("Loaded Ner Entities for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
log.info("Updated Dictionary for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
|
||||
long rulesVersion = droolsExecutionService.getRulesVersion(analyzeRequest.getDossierTemplateId());
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
log.info("Updated Rules to Version {} for file {} in dossier {}", rulesVersion, analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
log.debug("Starting Dictionary Search");
|
||||
long dictSearchStart = System.currentTimeMillis();
|
||||
entityRedactionService.addDictionaryEntities(dictionary, document);
|
||||
log.debug("Finished Dictionary Search in {} ms", System.currentTimeMillis() - dictSearchStart);
|
||||
log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
Set<FileAttribute> addedFileAttributes = entityRedactionService.addRuleEntities(dictionary, document, kieContainer, analyzeRequest, nerEntities);
|
||||
log.info("Finished Rule Execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
List<RedactionLogEntry> redactionLogEntries = redactionLogCreatorService.createRedactionLog(document, analyzeRequest.getDossierTemplateId());
|
||||
|
||||
@ -168,10 +179,12 @@ public class AnalyzeService {
|
||||
@SneakyThrows
|
||||
public AnalyzeResult reanalyze(@RequestBody AnalyzeRequest analyzeRequest) {
|
||||
|
||||
log.info("Starting Reanalysis for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
long startTime = System.currentTimeMillis();
|
||||
RedactionLog previousRedactionLog = redactionStorageService.getRedactionLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
log.info("Loaded previous redaction log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
|
||||
|
||||
log.info("Loaded Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
// not yet ready for reanalysis
|
||||
if (previousRedactionLog == null || document == null || document.getNumberOfPages() == 0) {
|
||||
return analyze(analyzeRequest);
|
||||
@ -183,6 +196,7 @@ public class AnalyzeService {
|
||||
|
||||
Set<Integer> sectionsToReanalyseIds = getSectionsToReanalyseIds(analyzeRequest, previousRedactionLog, document, dictionaryIncrement);
|
||||
List<SemanticNode> sectionsToReAnalyse = getSectionsToReAnalyse(document, sectionsToReanalyseIds);
|
||||
log.info("{} Sections to reanalyze found for file {} in dossier {}", sectionsToReanalyseIds.size(), analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
if (sectionsToReAnalyse.isEmpty()) {
|
||||
return finalizeAnalysis(analyzeRequest,
|
||||
@ -195,15 +209,16 @@ public class AnalyzeService {
|
||||
}
|
||||
|
||||
NerEntities nerEntities = getEntityRecognitionEntitiesFilteredBySectionIds(analyzeRequest, document, sectionsToReanalyseIds);
|
||||
log.info("Reanalyze {} sections with {} Ner Entities", sectionsToReAnalyse.size(), nerEntities.getNerEntityList().size());
|
||||
log.info("Loaded Ner Entities for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
|
||||
log.info("Updated Rules for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
|
||||
sectionsToReAnalyse.forEach(node -> entityRedactionService.addDictionaryEntities(dictionary, node));
|
||||
log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
long ruleStart = System.currentTimeMillis();
|
||||
Set<FileAttribute> addedFileAttributes = entityRedactionService.addRuleEntities(dictionary, document, sectionsToReAnalyse, kieContainer, analyzeRequest, nerEntities);
|
||||
log.info("Rule execution took {} ms", System.currentTimeMillis() - ruleStart);
|
||||
log.info("Finished Rule Execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
List<RedactionLogEntry> newRedactionLogEntries = redactionLogCreatorService.createRedactionLog(document, analyzeRequest.getDossierTemplateId());
|
||||
|
||||
@ -244,7 +259,10 @@ public class AnalyzeService {
|
||||
analyzeRequest.getFileId(),
|
||||
redactionLog,
|
||||
analyzeRequest.getAnalysisNumber());
|
||||
log.info("Created Redaction Log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLogChange.getRedactionLog());
|
||||
log.info("Stored Redaction Log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
long duration = System.currentTimeMillis() - startTime;
|
||||
|
||||
|
||||
@ -252,11 +252,11 @@ public class DictionaryService {
|
||||
falsePositives.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT)));
|
||||
falseRecommendations.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT)));
|
||||
}
|
||||
log.info("Dictionary update returned {} entries {} falsePositives and {} falseRecommendations for type {}",
|
||||
log.debug("Dictionary update returned {} entries {} falsePositives and {} falseRecommendations for type {}",
|
||||
entries.size(),
|
||||
falsePositives.size(),
|
||||
falseRecommendations.size(),
|
||||
type.getType());
|
||||
typeId);
|
||||
return new DictionaryEntries(entries, falsePositives, falseRecommendations);
|
||||
}
|
||||
|
||||
@ -304,7 +304,8 @@ public class DictionaryService {
|
||||
if (dossierDictionaryExists(dossierId)) {
|
||||
var dossierRepresentation = getDossierDictionary(dossierId);
|
||||
var dossierDictionaries = dossierRepresentation.getDictionary();
|
||||
mergedDictionaries = convertCommonsDictionaryModel(dictionaryMergeService.getMergedDictionary(convertDictionaryModel(dossierTemplateDictionaries), convertDictionaryModel(dossierDictionaries)));
|
||||
mergedDictionaries = convertCommonsDictionaryModel(dictionaryMergeService.getMergedDictionary(convertDictionaryModel(dossierTemplateDictionaries),
|
||||
convertDictionaryModel(dossierDictionaries)));
|
||||
dossierDictionaryVersion = dossierRepresentation.getDictionaryVersion();
|
||||
} else {
|
||||
mergedDictionaries = new ArrayList<>();
|
||||
@ -367,23 +368,37 @@ public class DictionaryService {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private List<CommonsDictionaryModel> convertDictionaryModel(List<DictionaryModel> dictionaries) {
|
||||
return dictionaries.stream().map(d -> CommonsDictionaryModel.builder()
|
||||
.type(d.getType())
|
||||
.rank(d.getRank())
|
||||
.color(d.getColor())
|
||||
.caseInsensitive(d.isCaseInsensitive())
|
||||
.hint(d.isHint())
|
||||
.isDossierDictionary(d.isDossierDictionary())
|
||||
.entries(d.getEntries())
|
||||
.falsePositives(d.getFalsePositives())
|
||||
.falseRecommendations(d.getFalseRecommendations())
|
||||
.build()).collect(Collectors.toList());
|
||||
|
||||
return dictionaries.stream()
|
||||
.map(d -> CommonsDictionaryModel.builder()
|
||||
.type(d.getType())
|
||||
.rank(d.getRank())
|
||||
.color(d.getColor())
|
||||
.caseInsensitive(d.isCaseInsensitive())
|
||||
.hint(d.isHint())
|
||||
.isDossierDictionary(d.isDossierDictionary())
|
||||
.entries(d.getEntries())
|
||||
.falsePositives(d.getFalsePositives())
|
||||
.falseRecommendations(d.getFalseRecommendations())
|
||||
.build())
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
private List<DictionaryModel> convertCommonsDictionaryModel(List<CommonsDictionaryModel> commonsDictionaries) {
|
||||
return commonsDictionaries.stream().map(cd ->
|
||||
new DictionaryModel(cd.getType(), cd.getRank(), cd.getColor(), cd.isCaseInsensitive(), cd.isHint(), cd.getEntries(), cd.getFalsePositives(), cd.getFalseRecommendations(), cd.isDossierDictionary()))
|
||||
|
||||
return commonsDictionaries.stream()
|
||||
.map(cd -> new DictionaryModel(cd.getType(),
|
||||
cd.getRank(),
|
||||
cd.getColor(),
|
||||
cd.isCaseInsensitive(),
|
||||
cd.isHint(),
|
||||
cd.getEntries(),
|
||||
cd.getFalsePositives(),
|
||||
cd.getFalseRecommendations(),
|
||||
cd.isDossierDictionary()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
@ -23,7 +23,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribu
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityCreationService;
|
||||
@ -91,6 +90,7 @@ public class DroolsExecutionService {
|
||||
kieSession.setGlobal("dictionary", dictionary);
|
||||
kieSession.setGlobal("nerEntitiesAdapter", nerEntitiesAdapter);
|
||||
|
||||
kieSession.insert(document);
|
||||
document.getEntities().forEach(kieSession::insert);
|
||||
sectionsToAnalyze.forEach(kieSession::insert);
|
||||
sectionsToAnalyze.stream().flatMap(SemanticNode::streamAllSubNodes).forEach(kieSession::insert);
|
||||
|
||||
@ -75,7 +75,7 @@ public class ManualRedactionSurroundingTextService {
|
||||
|
||||
Set<RedactionEntity> entities = RedactionSearchUtility.findBoundariesByString(value, node.getTextBlock())
|
||||
.stream()
|
||||
.map(boundary -> entityCreationService.byBoundary(boundary, "searchHelper", EntityType.RECOMMENDATION, node))
|
||||
.map(boundary -> entityCreationService.forceByBoundary(boundary, "searchHelper", EntityType.RECOMMENDATION, node))
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
RedactionEntity correctEntity = getEntityOnCorrectPosition(entities, toFindPositions);
|
||||
|
||||
@ -35,8 +35,9 @@ public class RedactionLogCreatorService {
|
||||
document.getEntities()
|
||||
.stream()
|
||||
.filter(RedactionLogCreatorService::isEntityOrRecommendationType)
|
||||
.filter(entity -> !entity.isRemoved())
|
||||
.forEach(entityNode -> entries.addAll(toRedactionLogEntries(entityNode, processedIds, dossierTemplateId)));
|
||||
document.streamAllImages().forEach(imageNode -> entries.add(createRedactionLogEntry(imageNode, dossierTemplateId)));
|
||||
document.streamAllImages().filter(image -> !image.isRemoved()).forEach(imageNode -> entries.add(createRedactionLogEntry(imageNode, dossierTemplateId)));
|
||||
return entries;
|
||||
}
|
||||
|
||||
@ -80,17 +81,14 @@ public class RedactionLogCreatorService {
|
||||
private RedactionLogEntry createRedactionLogEntry(RedactionEntity entity, String dossierTemplateId) {
|
||||
|
||||
Set<String> referenceIds = new HashSet<>();
|
||||
entity.getReferences()
|
||||
.stream()
|
||||
.filter(redactionEntity -> !redactionEntity.isRemoved() && !redactionEntity.isIgnored())
|
||||
.forEach(ref -> ref.getRedactionPositionsPerPage().forEach(pos -> referenceIds.add(pos.getId())));
|
||||
entity.getReferences().stream().filter(RedactionEntity::isActive).forEach(ref -> ref.getRedactionPositionsPerPage().forEach(pos -> referenceIds.add(pos.getId())));
|
||||
int sectionNumber = entity.getDeepestFullyContainingNode().getTreeId().isEmpty() ? 0 : entity.getDeepestFullyContainingNode().getTreeId().get(0);
|
||||
|
||||
return RedactionLogEntry.builder()
|
||||
.color(getColor(entity.getType(), dossierTemplateId, entity.isApplied()))
|
||||
.reason(entity.getMatchedRule().reason())
|
||||
.legalBasis(entity.getMatchedRule().legalBasis())
|
||||
.value(entity.getValue())
|
||||
.reason(entity.getMatchedRule().getReason())
|
||||
.legalBasis(entity.getMatchedRule().getLegalBasis())
|
||||
.value(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue())
|
||||
.type(entity.getType())
|
||||
.redacted(entity.isApplied())
|
||||
.isHint(isHint(entity.getType(), dossierTemplateId))
|
||||
@ -98,7 +96,7 @@ public class RedactionLogCreatorService {
|
||||
.isFalsePositive(entity.getEntityType().equals(EntityType.FALSE_POSITIVE) || entity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION))
|
||||
.section(entity.getDeepestFullyContainingNode().toString())
|
||||
.sectionNumber(sectionNumber)
|
||||
.matchedRule(entity.getMatchedRule().ruleIdentifier().toString())
|
||||
.matchedRule(entity.getMatchedRule().getRuleIdentifier().toString())
|
||||
.isDictionaryEntry(entity.isDictionaryEntry())
|
||||
.textAfter(entity.getTextAfter())
|
||||
.textBefore(entity.getTextBefore())
|
||||
@ -120,9 +118,9 @@ public class RedactionLogCreatorService {
|
||||
.isImage(true)
|
||||
.type(imageType)
|
||||
.redacted(image.isApplied())
|
||||
.reason(image.getMatchedRule().reason())
|
||||
.legalBasis(image.getMatchedRule().legalBasis())
|
||||
.matchedRule(image.getMatchedRule().ruleIdentifier().toString())
|
||||
.reason(image.getMatchedRule().getReason())
|
||||
.legalBasis(image.getMatchedRule().getLegalBasis())
|
||||
.matchedRule(image.getMatchedRule().getRuleIdentifier().toString())
|
||||
.isHint(dictionaryService.isHint(image.getImageType().toString(), dossierTemplateId))
|
||||
.isDictionaryEntry(false)
|
||||
.isRecommendation(false)
|
||||
|
||||
@ -55,7 +55,7 @@ class SectionFinderService {
|
||||
}
|
||||
});
|
||||
|
||||
log.info("Took: {} milliseconds to find sections to reanalyze", System.currentTimeMillis() - start);
|
||||
log.debug("Took: {} milliseconds to find sections to reanalyze", System.currentTimeMillis() - start);
|
||||
|
||||
return sectionsToReanalyse;
|
||||
}
|
||||
|
||||
@ -9,9 +9,9 @@ import lombok.experimental.UtilityClass;
|
||||
@UtilityClass
|
||||
public final class Patterns {
|
||||
|
||||
public static Map<String, Pattern> patternCache = new HashMap<>();
|
||||
public static final Map<String, Pattern> patternCache = new HashMap<>();
|
||||
|
||||
public static Pattern AUTHOR_TABLE_SPLITTER = Pattern.compile(
|
||||
public static final Pattern AUTHOR_TABLE_SPLITTER = Pattern.compile(
|
||||
"(((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,500}( ?[A-ZÄÖÜ]{1,2}\\.){1,3})|(((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,500}( ?[A-ZÄÖÜ]{1,2} ){1,3})");
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,163 @@
|
||||
package com.iqser.red.service.redaction.v1.server;
|
||||
|
||||
import static org.mockito.Mockito.when;
|
||||
import static org.wildfly.common.Assert.assertTrue;
|
||||
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.ComponentScan;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.FilterType;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.test.context.junit.jupiter.SpringExtension;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
|
||||
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
|
||||
@ExtendWith(SpringExtension.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class)
|
||||
public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
private static final String RULES = loadFromClassPath("drools/acceptance_rules.drl");
|
||||
|
||||
@Configuration
|
||||
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
|
||||
@ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)})
|
||||
public static class RedactionIntegrationTestConfiguration {
|
||||
|
||||
@Bean
|
||||
@Primary
|
||||
public StorageService inmemoryStorage() {
|
||||
|
||||
return new FileSystemBackedStorageService();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@BeforeEach
|
||||
public void stubClients() {
|
||||
|
||||
TenantContext.setTenantId("redaction");
|
||||
|
||||
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(RULES));
|
||||
|
||||
loadDictionaryForTest();
|
||||
loadTypeForTest();
|
||||
loadNerForTest();
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, false)).thenReturn(getTypeResponse());
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
|
||||
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(DOSSIER_REDACTIONS_INDICATOR)
|
||||
.dossierTemplateId(TEST_DOSSIER_ID)
|
||||
.hexColor("#ffe187")
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.build()));
|
||||
|
||||
mockDictionaryCalls(null);
|
||||
|
||||
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void acceptanceTests() throws IOException {
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf");
|
||||
System.out.println("Start Full integration test");
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
System.out.println("Finished structure analysis");
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
System.out.println("Finished analysis");
|
||||
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
var publishedInformationEntry1 = redactionLog.getRedactionLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> entry.getType().equals("published_information"))
|
||||
.filter(entry -> entry.getValue().equals("Oxford University Press"))
|
||||
.findFirst()
|
||||
.orElseThrow();
|
||||
|
||||
var asyaLyon1 = redactionLog.getRedactionLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> entry.getType().equals("CBI_author"))
|
||||
.filter(entry -> entry.getValue().equals("Asya Lyon"))
|
||||
.filter(entry -> entry.getSectionNumber() == publishedInformationEntry1.getSectionNumber())
|
||||
.findFirst()
|
||||
.orElseThrow();
|
||||
|
||||
// assertFalse(asyaLyon1.isRedacted());
|
||||
|
||||
var idRemoval = IdRemoval.builder()
|
||||
.requestDate(OffsetDateTime.now())
|
||||
.annotationId(publishedInformationEntry1.getId())
|
||||
.status(AnnotationStatus.APPROVED)
|
||||
.fileId(TEST_FILE_ID)
|
||||
.build();
|
||||
|
||||
var manualRedactions = ManualRedactions.builder().idsToRemove(Set.of(idRemoval)).build();
|
||||
request.setManualRedactions(manualRedactions);
|
||||
analyzeService.reanalyze(request);
|
||||
|
||||
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
var publishedInformationEntry2 = redactionLog.getRedactionLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> entry.getType().equals("published_information"))
|
||||
.filter(entry -> entry.getValue().equals("Oxford University Press"))
|
||||
.findFirst()
|
||||
.orElseThrow();
|
||||
|
||||
var asyaLyon2 = redactionLog.getRedactionLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> entry.getType().equals("CBI_author"))
|
||||
.filter(entry -> entry.getValue().equals("Asya Lyon"))
|
||||
.filter(entry -> entry.getSectionNumber() == publishedInformationEntry2.getSectionNumber())
|
||||
.findFirst()
|
||||
.orElseThrow();
|
||||
|
||||
assertTrue(asyaLyon2.isRedacted());
|
||||
|
||||
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
|
||||
|
||||
String outputFileName = OsUtils.getTemporaryDirectory() + "/AcceptanceTest.pdf";
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -20,7 +20,7 @@ public class RedactionEntityTest {
|
||||
entity.skip("CBI.3.0", "");
|
||||
entity.skip("CBI.4.1", "");
|
||||
entity.skip("CBI.4.0", "");
|
||||
assertThat(entity.getMatchedRule().ruleIdentifier().toString()).isEqualTo("CBI.4.1");
|
||||
assertThat(entity.getMatchedRule().getRuleIdentifier().toString()).isEqualTo("CBI.4.1");
|
||||
assertThat(entity.getMatchedRuleUnit()).isEqualTo(4);
|
||||
}
|
||||
|
||||
@ -34,7 +34,7 @@ public class RedactionEntityTest {
|
||||
entity.skip("CBI.3.0", "");
|
||||
entity.skip("CBI.4.1", "");
|
||||
entity.skip("CBI.4.0", "");
|
||||
assertThat(entity.getMatchedRule().ruleIdentifier().toString()).isEqualTo("MAN.2.0");
|
||||
assertThat(entity.getMatchedRule().getRuleIdentifier().toString()).isEqualTo("MAN.2.0");
|
||||
assertThat(entity.getMatchedRuleUnit()).isEqualTo(2);
|
||||
}
|
||||
|
||||
@ -59,7 +59,7 @@ public class RedactionEntityTest {
|
||||
entity.apply("CBI.0.0", "", "");
|
||||
});
|
||||
entity.skip("CBI.2.0", "");
|
||||
assertThat(entity.getMatchedRule().ruleIdentifier().toString()).isEqualTo("CBI.2.0");
|
||||
assertThat(entity.getMatchedRule().getRuleIdentifier().toString()).isEqualTo("CBI.2.0");
|
||||
assertThat(entity.getMatchedRuleUnit()).isEqualTo(2);
|
||||
}
|
||||
|
||||
|
||||
@ -44,6 +44,17 @@ public class DocumentEntityInsertionIntegrationTest extends BuildDocumentIntegra
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void assertSameEntitiesCantBeCreatedTwice() {
|
||||
|
||||
Document document = buildGraph("files/new/crafted document.pdf");
|
||||
String type = "CBI_author";
|
||||
assertTrue(entityCreationService.byBoundary(new Boundary(0, 10), type, EntityType.ENTITY, document).isPresent());
|
||||
assertTrue(entityCreationService.byBoundary(new Boundary(0, 10), type, EntityType.ENTITY, document).isEmpty());
|
||||
assertEquals(1, document.getEntities().size());
|
||||
}
|
||||
|
||||
|
||||
private RedactionEntity createAndInsertEntity(Document document, String searchTerm) {
|
||||
|
||||
int start = document.getTextBlock().indexOf(searchTerm);
|
||||
|
||||
@ -138,7 +138,7 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
|
||||
assertFalse(entity.getIntersectingNodes().isEmpty());
|
||||
assertEquals(1, entity.getPages().size());
|
||||
assertEquals("David Ksenia", entity.getValue());
|
||||
assertEquals("Something", entity.getMatchedRule().legalBasis());
|
||||
assertEquals("Something", entity.getMatchedRule().getLegalBasis());
|
||||
assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId());
|
||||
assertFalse(entity.isRemoved());
|
||||
assertTrue(entity.isSkipRemoveEntitiesContainedInLarger());
|
||||
|
||||
@ -0,0 +1,58 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.sectionidentifiers;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SectionIdentifier;
|
||||
|
||||
class SectionIdentifierTest {
|
||||
|
||||
@Test
|
||||
public void testParentOf() {
|
||||
|
||||
var headline = SectionIdentifier.fromSearchText("1 Did you ever hear the tragedy of Darth Plagueis The Wise?");
|
||||
var headline1 = SectionIdentifier.fromSearchText("1.0 I thought not. It’s not a story the Jedi would tell you.");
|
||||
var headline2 = SectionIdentifier.fromSearchText("1.1 It’s a Sith legend. Darth Plagueis was a Dark Lord of the Sith, ");
|
||||
var headline3 = SectionIdentifier.fromSearchText("1.2.3 so powerful and so wise he could use the Force to influence the midichlorians to create life…");
|
||||
var headline4 = SectionIdentifier.fromSearchText("1.2.3.4 He had such a knowledge of the dark side that he could even keep the ones he cared about from dying.");
|
||||
var headline5 = SectionIdentifier.fromSearchText("1.2.3.4.5 The dark side of the Force is a pathway to many abilities some consider to be unnatural.");
|
||||
var headline6 = SectionIdentifier.fromSearchText("2.0 He became so powerful…");
|
||||
var headline7 = SectionIdentifier.fromSearchText("10000.0 the only thing he was afraid of was losing his power,");
|
||||
var headline8 = SectionIdentifier.fromSearchText("A.0 which eventually, of course, he did.");
|
||||
var headline9 = SectionIdentifier.fromSearchText("Unfortunately, he taught his apprentice everything he knew, then his apprentice killed him in his sleep.");
|
||||
var headline10 = SectionIdentifier.fromSearchText("2.1.2 Ironic.");
|
||||
var headline11 = SectionIdentifier.fromSearchText("2.He could save others from death,");
|
||||
var headline12 = SectionIdentifier.fromSearchText(" 2. but not himself.");
|
||||
|
||||
var paragraph1 = SectionIdentifier.asChildOf(headline);
|
||||
assertTrue(paragraph1.isChildOf(headline));
|
||||
assertTrue(headline.isParentOf(paragraph1));
|
||||
assertFalse(paragraph1.isParentOf(headline));
|
||||
|
||||
assertFalse(headline.isParentOf(headline1));
|
||||
assertTrue(headline.isParentOf(headline2));
|
||||
assertTrue(headline.isParentOf(headline3));
|
||||
assertTrue(headline.isParentOf(headline4));
|
||||
assertTrue(headline.isParentOf(headline5));
|
||||
assertTrue(headline1.isParentOf(headline2));
|
||||
assertFalse(headline1.isParentOf(headline1));
|
||||
assertTrue(headline3.isParentOf(headline4));
|
||||
assertFalse(headline4.isParentOf(headline5));
|
||||
assertFalse(headline2.isParentOf(headline3));
|
||||
assertFalse(headline2.isParentOf(headline4));
|
||||
assertTrue(headline1.isParentOf(headline3));
|
||||
assertTrue(headline1.isParentOf(headline4));
|
||||
assertFalse(headline1.isParentOf(headline6));
|
||||
assertFalse(headline1.isParentOf(headline7));
|
||||
assertFalse(headline8.isParentOf(headline1));
|
||||
assertFalse(headline8.isParentOf(headline2));
|
||||
assertFalse(headline8.isParentOf(headline3));
|
||||
assertFalse(headline8.isParentOf(headline4));
|
||||
assertFalse(headline9.isParentOf(headline9));
|
||||
assertTrue(headline10.isChildOf(headline11));
|
||||
assertTrue(headline10.isChildOf(headline12));
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,93 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
|
||||
|
||||
class AtomicTextBlockTest {
|
||||
|
||||
@Test
|
||||
void subSequenceWithLineBreaks1() {
|
||||
|
||||
String searchText = "1234 6789 ";
|
||||
var atb = AtomicTextBlock.builder().searchText(searchText).lineBreaks(List.of(5, 7)).boundary(new Boundary(0, searchText.length())).build();
|
||||
String searchTextWithLineBreaks = atb.searchTextWithLineBreaks();
|
||||
assertEquals("1234\n6789\n", searchTextWithLineBreaks);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void subSequenceWithLineBreaks2() {
|
||||
|
||||
String searchText = "1234 6789 ";
|
||||
var atb = AtomicTextBlock.builder().searchText(searchText).lineBreaks(List.of(5, 7, 8, 9)).boundary(new Boundary(0, searchText.length())).build();
|
||||
String searchTextWithLineBreaks = atb.searchTextWithLineBreaks();
|
||||
assertEquals("1234\n6789\n", searchTextWithLineBreaks);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void subSequenceWithLineBreaks3() {
|
||||
|
||||
String searchText = "1234 6789 1234 ";
|
||||
var atb = AtomicTextBlock.builder().searchText(searchText).lineBreaks(List.of(5, 7)).boundary(new Boundary(0, searchText.length())).build();
|
||||
String searchTextWithLineBreaks = atb.searchTextWithLineBreaks();
|
||||
assertEquals("1234\n6789\n1234\n", searchTextWithLineBreaks);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void subSequenceWithLineBreaks4() {
|
||||
|
||||
String searchText = "1234 6789 1234 ";
|
||||
var atb = AtomicTextBlock.builder().searchText(searchText).lineBreaks(List.of(5, 7)).boundary(new Boundary(0, searchText.length())).build();
|
||||
var textBlock = new ConcatenatedTextBlock(List.of(atb));
|
||||
String searchTextWithLineBreaks = textBlock.searchTextWithLineBreaks();
|
||||
assertEquals("1234\n6789\n1234\n", searchTextWithLineBreaks);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void subSequenceWithLineBreaks5() {
|
||||
|
||||
String searchText1 = "1234 6789 ";
|
||||
String searchText2 = "1234 ";
|
||||
var atb1 = AtomicTextBlock.builder().searchText(searchText1).lineBreaks(List.of(5, 7)).boundary(new Boundary(0, searchText1.length())).build();
|
||||
var atb2 = AtomicTextBlock.builder()
|
||||
.searchText(searchText2)
|
||||
.lineBreaks(List.of())
|
||||
.boundary(new Boundary(searchText1.length(), searchText1.length() + searchText2.length()))
|
||||
.build();
|
||||
var textBlock = new ConcatenatedTextBlock(List.of(atb1, atb2));
|
||||
String searchTextWithLineBreaks = textBlock.searchTextWithLineBreaks();
|
||||
assertEquals("1234\n6789\n1234\n", searchTextWithLineBreaks);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void subSequenceWithLineBreaks6() {
|
||||
|
||||
String searchText1 = "1234 6789 ";
|
||||
String searchText2 = "1234 ";
|
||||
String searchText3 = "1234 8475678900 ";
|
||||
var atb1 = AtomicTextBlock.builder().searchText(searchText1).lineBreaks(List.of(5, 7)).boundary(new Boundary(0, searchText1.length())).build();
|
||||
var atb2 = AtomicTextBlock.builder()
|
||||
.searchText(searchText2)
|
||||
.lineBreaks(List.of())
|
||||
.boundary(new Boundary(searchText1.length(), searchText1.length() + searchText2.length()))
|
||||
.build();
|
||||
var atb3 = AtomicTextBlock.builder()
|
||||
.searchText(searchText3)
|
||||
.lineBreaks(List.of(atb2.getBoundary().end() + 6))
|
||||
.boundary(new Boundary(atb2.getBoundary().end(), atb2.getBoundary().end() + searchText3.length()))
|
||||
.build();
|
||||
var textBlock = new ConcatenatedTextBlock(List.of(atb1, atb2, atb3));
|
||||
String searchTextWithLineBreaks = textBlock.searchTextWithLineBreaks();
|
||||
assertEquals("1234\n6789\n1234\n1234 8475678900\n", searchTextWithLineBreaks);
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,90 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class RectangleTransformationsTest {
|
||||
|
||||
@Test
|
||||
public void testRectangle2DBBox() {
|
||||
|
||||
var r1 = new Rectangle2D.Double(0, 0, 1, 1);
|
||||
var r2 = new Rectangle2D.Double(1, 1, 1, 1);
|
||||
var result = RectangleTransformations.rectangle2DBBox(List.of(r1, r2));
|
||||
assertEquals(0, result.getX());
|
||||
assertEquals(0, result.getY());
|
||||
assertEquals(2, result.getWidth());
|
||||
assertEquals(2, result.getHeight());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testRectangle2DBBox2() {
|
||||
|
||||
var r1 = new Rectangle2D.Double(0, 0, -1, -1);
|
||||
var r2 = new Rectangle2D.Double(1, 1, 1, 1);
|
||||
var result = RectangleTransformations.rectangle2DBBox(List.of(r1, r2));
|
||||
assertEquals(-1, result.getX());
|
||||
assertEquals(-1, result.getY());
|
||||
assertEquals(3, result.getWidth());
|
||||
assertEquals(3, result.getHeight());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testRectangle2DBBox3() {
|
||||
|
||||
var r1 = new Rectangle2D.Double(0, 0, -1, -1);
|
||||
var r2 = new Rectangle2D.Double(1, 1, 1, 1);
|
||||
var result = RectangleTransformations.rectangle2DBBox(List.of(r2, r1));
|
||||
assertEquals(-1, result.getX());
|
||||
assertEquals(-1, result.getY());
|
||||
assertEquals(3, result.getWidth());
|
||||
assertEquals(3, result.getHeight());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testRectangle2DBBox4() {
|
||||
|
||||
var r1 = new Rectangle2D.Double(2, 0, -1, -1);
|
||||
var r2 = new Rectangle2D.Double(0, 2, 1, -1);
|
||||
var result = RectangleTransformations.rectangle2DBBox(List.of(r2, r1));
|
||||
assertEquals(0, result.getX());
|
||||
assertEquals(-1, result.getY());
|
||||
assertEquals(2, result.getWidth());
|
||||
assertEquals(3, result.getHeight());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testRectangle2DBBox5() {
|
||||
|
||||
var r1 = new Rectangle2D.Double(2, 0, -1, -1);
|
||||
var r2 = new Rectangle2D.Double(0, 2, 1, -1);
|
||||
var r3 = new Rectangle2D.Double(3, 2, 1, 1);
|
||||
var result = RectangleTransformations.rectangle2DBBox(List.of(r2, r1, r3));
|
||||
assertEquals(0, result.getX());
|
||||
assertEquals(-1, result.getY());
|
||||
assertEquals(4, result.getWidth());
|
||||
assertEquals(4, result.getHeight());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testRectangle2DBBox6() {
|
||||
|
||||
var r1 = new Rectangle2D.Double(0, 0, -1, -1);
|
||||
var r2 = new Rectangle2D.Double(-1, -1, -1, -1);
|
||||
var result = RectangleTransformations.rectangle2DBBox(List.of(r1, r2));
|
||||
assertEquals(-2, result.getX());
|
||||
assertEquals(-2, result.getY());
|
||||
assertEquals(2, result.getWidth());
|
||||
assertEquals(2, result.getHeight());
|
||||
}
|
||||
|
||||
}
|
||||
@ -8,6 +8,7 @@ import java.awt.geom.Rectangle2D;
|
||||
import java.io.File;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
@ -66,6 +67,8 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
|
||||
.filter(e -> !e.type().equals("CBI_author"));
|
||||
List<RedactionEntity> redactionEntities = Stream.concat(entityRecognitionEntities.stream(), unchangedAddressParts)
|
||||
.map(e -> entityCreationService.byBoundary(e.boundary(), e.type(), EntityType.ENTITY, document))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.toList();
|
||||
redactionEntities.stream()
|
||||
.collect(Collectors.groupingBy(e -> e.getPages().stream().findFirst().get().getNumber()))
|
||||
@ -98,6 +101,8 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
|
||||
log.info("Combined to CBI_address");
|
||||
List<RedactionEntity> cbiAddressEntities = nerEntityBoundaries.stream()
|
||||
.map(b -> entityCreationService.byBoundary(b, "CBI_address", EntityType.RECOMMENDATION, document))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.toList();
|
||||
assertFalse(cbiAddressEntities.isEmpty());
|
||||
assertTrue(cbiAddressEntities.stream().allMatch(entity -> entity.getBoundary().start() < entity.getBoundary().end()));
|
||||
@ -108,6 +113,8 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
|
||||
.getNerEntityList()
|
||||
.stream()
|
||||
.map(e -> entityCreationService.byBoundary(e.boundary(), e.type(), EntityType.ENTITY, document))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.toList();
|
||||
Stream.concat(cbiAddressEntities.stream(), validatedEntities.stream())
|
||||
.collect(Collectors.groupingBy(e -> e.getPages().stream().findFirst().get().getNumber()))
|
||||
|
||||
@ -143,6 +143,7 @@ Allen T.
|
||||
Allen T.R.
|
||||
Almeida A
|
||||
Almeida A.
|
||||
Asya Lyon
|
||||
Almeida A.A.
|
||||
Almeida A.A.|Vassilieff I.
|
||||
Almeida|A.A.|Vassilieff|I.
|
||||
|
||||
@ -87,3 +87,4 @@ Toxicol Sci.
|
||||
Toxicol Sci. 1
|
||||
Test Ignored Hint Published Information
|
||||
Workshop
|
||||
Oxford University Press
|
||||
|
||||
@ -0,0 +1,772 @@
|
||||
package drools
|
||||
|
||||
import static java.lang.String.format;
|
||||
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.anyMatch;
|
||||
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.exactMatch;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.Collection;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.Optional;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.*;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.*;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.ImageType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityCreationService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryModel;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.ManualRedactionApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility;
|
||||
|
||||
global Document document
|
||||
global EntityCreationService entityCreationService
|
||||
global ManualRedactionApplicationService manualRedactionApplicationService
|
||||
global NerEntitiesAdapter nerEntitiesAdapter
|
||||
global Dictionary dictionary
|
||||
|
||||
//------------------------------------ queries ------------------------------------
|
||||
|
||||
query "getFileAttributes"
|
||||
$fileAttribute: FileAttribute()
|
||||
end
|
||||
|
||||
//------------------------------------ Syngenta specific rules ------------------------------------
|
||||
|
||||
// Rule unit: SYN.1
|
||||
rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL"
|
||||
when
|
||||
$section: Section(containsString("CT") || containsString("BL"))
|
||||
then
|
||||
/* Regular expression: ((\b((([Cc]T(([1ILli\/])| L|~P))|(BL))[\. ]?([\dA-Ziltphz~\/.:!]| ?[\(',][Ppi](\(e)?|([\(-?']\/))+( ?[\(\/\dA-Znasieg]+)?)\b( ?\/? ?\d+)?)|(\bCT[L1i]\b)) */
|
||||
entityCreationService.byRegexIgnoreCase("((\\b((([Cc]T(([1ILli\\/])| L|~P))|(BL))[\\. ]?([\\dA-Ziltphz~\\/.:!]| ?[\\(',][Ppi](\\(e)?|([\\(-?']\\/))+( ?[\\(\\/\\dA-Znasieg]+)?)\\b( ?\\/? ?\\d+)?)|(\\bCT[L1i]\\b))", "CBI_address", EntityType.RECOMMENDATION, $section)
|
||||
.forEach(entity -> {
|
||||
entity.skip("SYN.1.0", "");
|
||||
entity.addEngine(Engine.RULE);
|
||||
insert(entity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ CBI rules ------------------------------------
|
||||
|
||||
// Rule unit: CBI.0
|
||||
rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_author", dictionaryEntry)
|
||||
then
|
||||
$entity.apply("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_author", dictionaryEntry)
|
||||
then
|
||||
$entity.apply("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: CBI.1
|
||||
rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_address", dictionaryEntry)
|
||||
then
|
||||
$entity.skip("CBI.1.0", "Address found for Non Vertebrate Study");
|
||||
end
|
||||
|
||||
rule "CBI.1.1: Redact CBI Address (Vertebrate Study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_address", dictionaryEntry)
|
||||
then
|
||||
$entity.apply("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: CBI.2
|
||||
rule "CBI.2.0: Don't redact genitive CBI_author"
|
||||
when
|
||||
$entity: RedactionEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), isApplied())
|
||||
then
|
||||
entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document)
|
||||
.ifPresent(falsePositive -> {
|
||||
falsePositive.skip("CBI.2.0", "Genitive Author found");
|
||||
insert(falsePositive);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: CBI.7
|
||||
rule "CBI.7.0: Do not redact Names and Addresses if published information found in section without tables"
|
||||
when
|
||||
$section: Section(!hasTables(),
|
||||
hasEntitiesOfType("published_information"),
|
||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||
then
|
||||
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.skipWithReferences(
|
||||
"CBI.7.0",
|
||||
"Published Information found in section",
|
||||
$section.getEntitiesOfType("published_information")
|
||||
);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row"
|
||||
when
|
||||
$table: Table(hasEntitiesOfType("published_information"),
|
||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||
then
|
||||
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.skipWithReferences(
|
||||
"CBI.7.1",
|
||||
"Published Information found in row",
|
||||
$table.getEntitiesOfTypeInSameRow("published_information", redactionEntity)
|
||||
);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: CBI.9
|
||||
rule "CBI.9.0: Redact all Cell's with Header Author(s) as CBI_author (non vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$table: Table(hasHeader("Author(s)"))
|
||||
then
|
||||
$table.streamTableCellsWithHeader("Author(s)")
|
||||
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.apply("CBI.9.0", "Author(s) found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
redactionEntity.addEngine(Engine.RULE);
|
||||
insert(redactionEntity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.9.1: Redact all Cell's with Header Author as CBI_author (non vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$table: Table(hasHeader("Author"))
|
||||
then
|
||||
$table.streamTableCellsWithHeader("Author")
|
||||
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.apply("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
redactionEntity.addEngine(Engine.RULE);
|
||||
insert(redactionEntity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: CBI.10
|
||||
rule "CBI.10.0: Redact all Cell's with Header Author(s) as CBI_author (vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$table: Table(hasHeader("Author(s)"))
|
||||
then
|
||||
$table.streamTableCellsWithHeader("Author(s)")
|
||||
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.apply("CBI.10.0", "Author(s) found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
redactionEntity.addEngine(Engine.RULE);
|
||||
insert(redactionEntity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.10.1: Redact all Cell's with Header Author as CBI_author (vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$table: Table(hasHeader("Author"))
|
||||
then
|
||||
$table.streamTableCellsWithHeader("Author")
|
||||
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.apply("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
redactionEntity.addEngine(Engine.RULE);
|
||||
insert(redactionEntity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: CBI.11
|
||||
rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
salience -1
|
||||
when
|
||||
$table: Table(hasHeader("Author(s)") && hasHeader("Vertebrate Study Y/N"))
|
||||
then
|
||||
$table.getEntitiesOfType("CBI_author").forEach(entity -> dictionary.addMultipleAuthorsAsRecommendation(entity));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: CBI.16
|
||||
rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.apply("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
entity.addEngine(Engine.RULE);
|
||||
dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false);
|
||||
insert(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.apply("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
entity.addEngine(Engine.RULE);
|
||||
insert(entity);
|
||||
dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: CBI.17
|
||||
rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon"
|
||||
when
|
||||
$section: Section(!hasTables(), containsString("Species") && containsString("Source") && !containsString("Species:") && !containsString("Source:"))
|
||||
then
|
||||
entityCreationService.lineAfterString("Source", "CBI_address", EntityType.RECOMMENDATION, $section)
|
||||
.forEach(entity -> {
|
||||
entity.addEngine(Engine.RULE);
|
||||
entity.skip("CBI.17.0", "Line after \"Source\" in Test Organism Section");
|
||||
insert(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.17.1: Add recommendation for Addresses in Test Organism sections, with colon"
|
||||
when
|
||||
$section: Section(!hasTables(), containsString("Species:"), containsString("Source:"))
|
||||
then
|
||||
entityCreationService.lineAfterString("Source:", "CBI_address", EntityType.RECOMMENDATION, $section)
|
||||
.forEach(entity -> {
|
||||
entity.addEngine(Engine.RULE);
|
||||
entity.skip("CBI.17.1", "Line after \"Source:\" in Test Animals Section");
|
||||
insert(entity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: CBI.20
|
||||
rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value == "Yes")
|
||||
$section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:"))
|
||||
then
|
||||
entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section)
|
||||
.forEach(laboratoryEntity -> {
|
||||
laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study");
|
||||
laboratoryEntity.addEngine(Engine.RULE);
|
||||
dictionary.addLocalDictionaryEntry(laboratoryEntity);
|
||||
insert(laboratoryEntity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value == "Yes")
|
||||
$section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:"))
|
||||
then
|
||||
entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section)
|
||||
.forEach(laboratoryEntity -> {
|
||||
laboratoryEntity.apply("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
laboratoryEntity.addEngine(Engine.RULE);
|
||||
dictionary.addLocalDictionaryEntry(laboratoryEntity);
|
||||
insert(laboratoryEntity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ PII rules ------------------------------------
|
||||
|
||||
// Rule unit: PII.0
|
||||
rule "PII.0.0: Redact all PII (non vertebrate study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$pii: RedactionEntity(type == "PII", dictionaryEntry)
|
||||
then
|
||||
$pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
rule "PII.0.1: Redact all PII (vertebrate study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$pii: RedactionEntity(type == "PII", dictionaryEntry)
|
||||
then
|
||||
$pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: PII.1
|
||||
rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$section: Section(containsString("@"))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section)
|
||||
.forEach(emailEntity -> {
|
||||
emailEntity.addEngine(Engine.RULE);
|
||||
emailEntity.apply("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
insert(emailEntity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "PII.1.1: Redact Emails by RegEx (vertebrate study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$section: Section(containsString("@"))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section)
|
||||
.forEach(emailEntity -> {
|
||||
emailEntity.addEngine(Engine.RULE);
|
||||
emailEntity.apply("PII.1.1", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
insert(emailEntity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: PII.2
|
||||
rule "PII.2.0: Redact Phone and Fax by RegEx (non vertebrate study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$section: Section(containsString("Contact") ||
|
||||
containsString("Telephone") ||
|
||||
containsString("Phone") ||
|
||||
containsString("Ph.") ||
|
||||
containsString("Fax") ||
|
||||
containsString("Tel") ||
|
||||
containsString("Ter") ||
|
||||
containsString("Mobile") ||
|
||||
containsString("Fel") ||
|
||||
containsString("Fer"))
|
||||
then
|
||||
entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section)
|
||||
.forEach(contactEntity -> {
|
||||
contactEntity.addEngine(Engine.RULE);
|
||||
contactEntity.apply("PII.2.0", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
insert(contactEntity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$section: Section(containsString("Contact") ||
|
||||
containsString("Telephone") ||
|
||||
containsString("Phone") ||
|
||||
containsString("Ph.") ||
|
||||
containsString("Fax") ||
|
||||
containsString("Tel") ||
|
||||
containsString("Ter") ||
|
||||
containsString("Mobile") ||
|
||||
containsString("Fel") ||
|
||||
containsString("Fer"))
|
||||
then
|
||||
entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section)
|
||||
.forEach(contactEntity -> {
|
||||
contactEntity.addEngine(Engine.RULE);
|
||||
contactEntity.apply("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
insert(contactEntity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: PII.9
|
||||
rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebrate study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:"))
|
||||
then
|
||||
entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
|
||||
.forEach(authorEntity -> {
|
||||
authorEntity.apply("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
authorEntity.addEngine(Engine.RULE);
|
||||
insert(authorEntity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (non vertebrate study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:"))
|
||||
then
|
||||
entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
|
||||
.forEach(authorEntity -> {
|
||||
authorEntity.apply("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
authorEntity.addEngine(Engine.RULE);
|
||||
insert(authorEntity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebrate study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:"))
|
||||
then
|
||||
entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
|
||||
.forEach(authorEntity -> {
|
||||
authorEntity.apply("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
authorEntity.addEngine(Engine.RULE);
|
||||
insert(authorEntity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (vertebrate study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:"))
|
||||
then
|
||||
entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
|
||||
.forEach(authorEntity -> {
|
||||
authorEntity.apply("PII.9.3", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
authorEntity.addEngine(Engine.RULE);
|
||||
insert(authorEntity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ Other rules ------------------------------------
|
||||
|
||||
// Rule unit: ETC.0
|
||||
rule "ETC.0.0: Purity Hint"
|
||||
when
|
||||
$section: Section(containsStringIgnoreCase("purity"))
|
||||
then
|
||||
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", "hint_only", EntityType.ENTITY, 1, $section)
|
||||
.forEach(hint -> {
|
||||
hint.addEngine(Engine.RULE);
|
||||
hint.skip("ETC.0.0", "");
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: ETC.2
|
||||
rule "ETC.2.0: Redact signatures (non vertebrate study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value == "Yes")
|
||||
$signature: Image(imageType == ImageType.SIGNATURE)
|
||||
then
|
||||
$signature.apply("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
rule "ETC.2.0: Redact signatures (vertebrate study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value == "Yes")
|
||||
$signature: Image(imageType == ImageType.SIGNATURE)
|
||||
then
|
||||
$signature.apply("ETC.2.0", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: ETC.3
|
||||
rule "ETC.3.0: Redact logos (vertebrate study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value == "Yes")
|
||||
$logo: Image(imageType == ImageType.LOGO)
|
||||
then
|
||||
$logo.apply("ETC.3.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
rule "ETC.3.1: Redact logos (non vertebrate study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value == "Yes")
|
||||
$logo: Image(imageType == ImageType.LOGO)
|
||||
then
|
||||
$logo.apply("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: ETC.5
|
||||
rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'"
|
||||
when
|
||||
not FileAttribute(label == "Confidentiality", value == "confidential")
|
||||
$dossierRedaction: RedactionEntity(type == "dossier_redaction")
|
||||
then
|
||||
$dossierRedaction.setIgnored(true);
|
||||
update($dossierRedaction);
|
||||
$dossierRedaction.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ AI rules ------------------------------------
|
||||
|
||||
// Rule unit: AI.0
|
||||
rule "AI.0.0: add all NER Entities of type CBI_author"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("CBI_author"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("CBI_author")
|
||||
.map(nerEntity -> entityCreationService.byNerEntity(nerEntity, EntityType.RECOMMENDATION, document))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.forEach(entity -> insert(entity));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.1
|
||||
rule "AI.1.0: combine and add NER Entities as CBI_address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("ORG") || hasEntitiesOfType("STREET") || hasEntitiesOfType("CITY"))
|
||||
then
|
||||
nerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
|
||||
.map(boundary -> entityCreationService.byBoundary(boundary, "CBI_address", EntityType.RECOMMENDATION, document))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.forEach(entity -> {
|
||||
entity.addEngine(Engine.NER);
|
||||
insert(entity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ Manual redaction rules ------------------------------------
|
||||
|
||||
// Rule unit: MAN.0
|
||||
rule "MAN.0.0: Apply manual resize redaction"
|
||||
salience 128
|
||||
when
|
||||
$resizeRedaction: ManualResizeRedaction($id: annotationId)
|
||||
$entityToBeResized: RedactionEntity(matchesAnnotationId($id))
|
||||
then
|
||||
manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
|
||||
retract($resizeRedaction);
|
||||
update($entityToBeResized);
|
||||
$entityToBeResized.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: MAN.1
|
||||
rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity"
|
||||
salience 128
|
||||
when
|
||||
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
|
||||
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToBeRemoved.setIgnored(true);
|
||||
update($entityToBeRemoved);
|
||||
retract($idRemoval);
|
||||
$entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
|
||||
rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image"
|
||||
salience 128
|
||||
when
|
||||
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
|
||||
$imageEntityToBeRemoved: Image($id == id)
|
||||
then
|
||||
$imageEntityToBeRemoved.setIgnored(true);
|
||||
update($imageEntityToBeRemoved);
|
||||
retract($idRemoval);
|
||||
update($imageEntityToBeRemoved.getParent());
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: MAN.2
|
||||
rule "MAN.2.0: Apply force redaction"
|
||||
salience 128
|
||||
when
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
|
||||
$entityToForce: RedactionEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis);
|
||||
$entityToForce.setRemoved(false);
|
||||
$entityToForce.setIgnored(false);
|
||||
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
|
||||
update($entityToForce);
|
||||
$entityToForce.getIntersectingNodes().forEach(node -> update(node));
|
||||
retract($force);
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: MAN.3
|
||||
rule "MAN.3.0: Apply image recategorization"
|
||||
salience 128
|
||||
when
|
||||
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
|
||||
$imageToBeRecategorized: Image($id == id)
|
||||
then
|
||||
$imageToBeRecategorized.setImageType(ImageType.fromString($imageType));
|
||||
update($imageToBeRecategorized);
|
||||
retract($recategorization);
|
||||
update($imageToBeRecategorized.getParent());
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ Entity merging rules ------------------------------------
|
||||
|
||||
// Rule unit: X.0
|
||||
rule "X.0.0: remove Entity contained by Entity of same type"
|
||||
salience 65
|
||||
when
|
||||
$larger: RedactionEntity($type: type, $entityType: entityType)
|
||||
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$contained.remove();
|
||||
retract($contained);
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: X.1
|
||||
rule "X.1.0: merge intersecting Entities of same type"
|
||||
salience 64
|
||||
when
|
||||
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$first.remove();
|
||||
$second.remove();
|
||||
RedactionEntity mergedEntity = entityCreationService.byEntities(List.of($first, $second), $type, $entityType, document);
|
||||
retract($first);
|
||||
retract($second);
|
||||
insert(mergedEntity);
|
||||
mergedEntity.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: X.2
|
||||
rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
|
||||
salience 64
|
||||
when
|
||||
$falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE)
|
||||
$entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$entity.getIntersectingNodes().forEach(node -> update(node));
|
||||
$entity.remove();
|
||||
retract($entity)
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: X.3
|
||||
rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"
|
||||
salience 64
|
||||
when
|
||||
$falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION)
|
||||
$recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$recommendation.remove();
|
||||
retract($recommendation);
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: X.4
|
||||
rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"
|
||||
salience 256
|
||||
when
|
||||
$entity: RedactionEntity($type: type, entityType == EntityType.ENTITY)
|
||||
$recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$entity.addEngines($recommendation.getEngines());
|
||||
$recommendation.remove();
|
||||
retract($recommendation);
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: X.5
|
||||
rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
|
||||
salience 256
|
||||
when
|
||||
$entity: RedactionEntity(entityType == EntityType.ENTITY)
|
||||
$recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$recommendation.remove();
|
||||
retract($recommendation);
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: X.6
|
||||
rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY"
|
||||
salience 32
|
||||
when
|
||||
$higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY)
|
||||
$lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$lowerRank.getIntersectingNodes().forEach(node -> update(node));
|
||||
$lowerRank.remove();
|
||||
retract($lowerRank);
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ File attributes rules ------------------------------------
|
||||
|
||||
// Rule unit: FA.1
|
||||
rule "FA.1.0: remove duplicate FileAttributes"
|
||||
salience 64
|
||||
when
|
||||
$fileAttribute: FileAttribute($label: label, $value: value)
|
||||
$duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value)
|
||||
then
|
||||
retract($duplicate);
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ Local dictionary search rules ------------------------------------
|
||||
|
||||
// Rule unit: LDS.0
|
||||
rule "LDS.0.0: run local dictionary search"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
salience -999
|
||||
when
|
||||
DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels()
|
||||
then
|
||||
entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document)
|
||||
.forEach(entity -> {
|
||||
entity.addEngine(Engine.RULE);
|
||||
insert(entity);
|
||||
});
|
||||
end
|
||||
@ -132,9 +132,11 @@ rule "CBI.2.0: Don't redact genitive CBI_author"
|
||||
when
|
||||
$entity: RedactionEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), isApplied())
|
||||
then
|
||||
RedactionEntity falsePositive = entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document);
|
||||
falsePositive.skip("CBI.2.0", "Genitive Author found");
|
||||
insert(falsePositive);
|
||||
entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document)
|
||||
.ifPresent(falsePositive -> {
|
||||
falsePositive.skip("CBI.2.0", "Genitive Author found");
|
||||
insert(falsePositive);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
@ -246,7 +248,6 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red
|
||||
then
|
||||
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||
.forEach(entity -> {
|
||||
entity.addEngine(Engine.RULE);
|
||||
entity.applyWithReferences(
|
||||
"CBI.5.0",
|
||||
"no_redaction_indicator but also redaction_indicator found",
|
||||
@ -264,10 +265,9 @@ rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also red
|
||||
hasEntitiesOfType("redaction_indicator"),
|
||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||
then
|
||||
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no-redaction_indicator"))
|
||||
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator"))
|
||||
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
|
||||
.forEach(entity -> {
|
||||
entity.addEngine(Engine.RULE);
|
||||
entity.applyWithReferences(
|
||||
"CBI.5.1",
|
||||
"no_redaction_indicator but also redaction_indicator found",
|
||||
@ -290,7 +290,6 @@ rule "CBI.6.0: Don't redact Names and Addresses if vertebrate but also published
|
||||
then
|
||||
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||
.forEach(entity -> {
|
||||
entity.addEngine(Engine.RULE);
|
||||
entity.skipWithReferences(
|
||||
"CBI.6.0",
|
||||
"vertebrate but also published_information found",
|
||||
@ -307,10 +306,9 @@ rule "CBI.6.1: Don't redact Names and Addresses if vertebrate but also published
|
||||
hasEntitiesOfType("published_information"),
|
||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||
then
|
||||
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no-redaction_indicator"))
|
||||
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "published_information"))
|
||||
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
|
||||
.forEach(entity -> {
|
||||
entity.addEngine(Engine.RULE);
|
||||
entity.skipWithReferences(
|
||||
"CBI.6.1",
|
||||
"vertebrate but also published_information found",
|
||||
@ -326,8 +324,8 @@ rule "CBI.6.1: Don't redact Names and Addresses if vertebrate but also published
|
||||
rule "CBI.7.0: Do not redact Names and Addresses if published information found in section without tables"
|
||||
when
|
||||
$section: Section(!hasTables(),
|
||||
hasEntitiesOfType("published_information"),
|
||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||
hasEntitiesOfType("published_information"),
|
||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||
then
|
||||
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||
.forEach(redactionEntity -> {
|
||||
@ -342,7 +340,7 @@ rule "CBI.7.0: Do not redact Names and Addresses if published information found
|
||||
rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row"
|
||||
when
|
||||
$table: Table(hasEntitiesOfType("published_information"),
|
||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||
then
|
||||
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||
.forEach(redactionEntity -> {
|
||||
@ -522,7 +520,7 @@ rule "CBI.13.0: Ignore CBI Address Recommendations"
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_address", entityType == EntityType.RECOMMENDATION)
|
||||
then
|
||||
$entity.removeFromGraph();
|
||||
$entity.remove();
|
||||
retract($entity)
|
||||
end
|
||||
|
||||
@ -655,11 +653,13 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials"
|
||||
anyMatch(textAfter, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)")
|
||||
)
|
||||
then
|
||||
RedactionEntity expandedEntity = entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)");
|
||||
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
|
||||
$entityToExpand.removeFromGraph();
|
||||
retract($entityToExpand);
|
||||
insert(expandedEntity);
|
||||
entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)")
|
||||
.ifPresent(expandedEntity -> {
|
||||
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
|
||||
$entityToExpand.remove();
|
||||
retract($entityToExpand);
|
||||
insert(expandedEntity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
@ -668,11 +668,13 @@ rule "CBI.19.0: Expand CBI_author entities with salutation prefix"
|
||||
when
|
||||
$entityToExpand: RedactionEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
|
||||
then
|
||||
RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*");
|
||||
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
|
||||
$entityToExpand.removeFromGraph();
|
||||
retract($entityToExpand);
|
||||
insert(expandedEntity);
|
||||
entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")
|
||||
.ifPresent(expandedEntity -> {
|
||||
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
|
||||
$entityToExpand.remove();
|
||||
retract($entityToExpand);
|
||||
insert(expandedEntity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
@ -1143,10 +1145,12 @@ rule "PII.12.0: Expand PII entities with salutation prefix"
|
||||
when
|
||||
$entityToExpand: RedactionEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
|
||||
then
|
||||
RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*");
|
||||
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
|
||||
expandedEntity.addEngine(Engine.RULE);
|
||||
insert(expandedEntity);
|
||||
entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")
|
||||
.ifPresent(expandedEntity -> {
|
||||
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
|
||||
expandedEntity.addEngine(Engine.RULE);
|
||||
insert(expandedEntity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
@ -1229,8 +1233,9 @@ rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confi
|
||||
not FileAttribute(label == "Confidentiality", value == "confidential")
|
||||
$dossierRedaction: RedactionEntity(type == "dossier_redaction")
|
||||
then
|
||||
$dossierRedaction.removeFromGraph();
|
||||
retract($dossierRedaction);
|
||||
$dossierRedaction.setIgnored(true);
|
||||
update($dossierRedaction);
|
||||
$dossierRedaction.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
|
||||
|
||||
@ -1290,6 +1295,8 @@ rule "AI.0.0: add all NER Entities of type CBI_author"
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("CBI_author")
|
||||
.map(nerEntity -> entityCreationService.byNerEntity(nerEntity, EntityType.RECOMMENDATION, document))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.forEach(entity -> insert(entity));
|
||||
end
|
||||
|
||||
@ -1302,6 +1309,8 @@ rule "AI.1.0: combine and add NER Entities as CBI_address"
|
||||
then
|
||||
nerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
|
||||
.map(boundary -> entityCreationService.byBoundary(boundary, "CBI_address", EntityType.RECOMMENDATION, document))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.forEach(entity -> {
|
||||
entity.addEngine(Engine.NER);
|
||||
insert(entity);
|
||||
@ -1318,6 +1327,8 @@ rule "AI.2.0: add all NER Entities of any type except CBI_author"
|
||||
nerEntities.getNerEntityList().stream()
|
||||
.filter(nerEntity -> !nerEntity.type().equals("CBI_author"))
|
||||
.map(nerEntity -> entityCreationService.byNerEntity(nerEntity, nerEntity.type().toLowerCase(), EntityType.RECOMMENDATION, document))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.forEach(entity -> insert(entity));
|
||||
end
|
||||
|
||||
@ -1334,6 +1345,7 @@ rule "MAN.0.0: Apply manual resize redaction"
|
||||
manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
|
||||
retract($resizeRedaction);
|
||||
update($entityToBeResized);
|
||||
$entityToBeResized.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
|
||||
|
||||
@ -1341,21 +1353,27 @@ rule "MAN.0.0: Apply manual resize redaction"
|
||||
rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity"
|
||||
salience 128
|
||||
when
|
||||
IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
|
||||
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToBeRemoved.setIgnored(true);
|
||||
update($entityToBeRemoved);
|
||||
retract($idRemoval);
|
||||
$entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
|
||||
rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image"
|
||||
salience 128
|
||||
when
|
||||
IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
|
||||
$imageEntityToBeRemoved: Image($id == id)
|
||||
then
|
||||
$imageEntityToBeRemoved.setIgnored(true);
|
||||
update($imageEntityToBeRemoved);
|
||||
retract($idRemoval);
|
||||
update($imageEntityToBeRemoved.getParent());
|
||||
end
|
||||
|
||||
|
||||
@ -1363,11 +1381,16 @@ rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to
|
||||
rule "MAN.2.0: Apply force redaction"
|
||||
salience 128
|
||||
when
|
||||
ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
|
||||
$entityToForce: RedactionEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis);
|
||||
$entityToForce.setRemoved(false);
|
||||
$entityToForce.setIgnored(false);
|
||||
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
|
||||
update($entityToForce);
|
||||
$entityToForce.getIntersectingNodes().forEach(node -> update(node));
|
||||
retract($force);
|
||||
end
|
||||
|
||||
|
||||
@ -1375,10 +1398,13 @@ rule "MAN.2.0: Apply force redaction"
|
||||
rule "MAN.3.0: Apply image recategorization"
|
||||
salience 128
|
||||
when
|
||||
ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
|
||||
$image: Image($id == id)
|
||||
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
|
||||
$imageToBeRecategorized: Image($id == id)
|
||||
then
|
||||
$image.setImageType(ImageType.fromString($imageType));
|
||||
$imageToBeRecategorized.setImageType(ImageType.fromString($imageType));
|
||||
update($imageToBeRecategorized);
|
||||
update($imageToBeRecategorized.getParent());
|
||||
retract($recategorization);
|
||||
end
|
||||
|
||||
|
||||
@ -1391,7 +1417,7 @@ rule "X.0.0: remove Entity contained by Entity of same type"
|
||||
$larger: RedactionEntity($type: type, $entityType: entityType)
|
||||
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$contained.removeFromGraph();
|
||||
$contained.remove();
|
||||
retract($contained);
|
||||
end
|
||||
|
||||
@ -1403,12 +1429,13 @@ rule "X.1.0: merge intersecting Entities of same type"
|
||||
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$first.removeFromGraph();
|
||||
$second.removeFromGraph();
|
||||
$first.remove();
|
||||
$second.remove();
|
||||
RedactionEntity mergedEntity = entityCreationService.byEntities(List.of($first, $second), $type, $entityType, document);
|
||||
retract($first);
|
||||
retract($second);
|
||||
insert(mergedEntity);
|
||||
mergedEntity.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
|
||||
|
||||
@ -1419,7 +1446,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
|
||||
$falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE)
|
||||
$entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$entity.removeFromGraph();
|
||||
$entity.getIntersectingNodes().forEach(node -> update(node));
|
||||
$entity.remove();
|
||||
retract($entity)
|
||||
end
|
||||
|
||||
@ -1431,7 +1459,7 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM
|
||||
$falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION)
|
||||
$recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$recommendation.removeFromGraph();
|
||||
$recommendation.remove();
|
||||
retract($recommendation);
|
||||
end
|
||||
|
||||
@ -1444,7 +1472,7 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit
|
||||
$recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$entity.addEngines($recommendation.getEngines());
|
||||
$recommendation.removeFromGraph();
|
||||
$recommendation.remove();
|
||||
retract($recommendation);
|
||||
end
|
||||
|
||||
@ -1456,7 +1484,7 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
|
||||
$entity: RedactionEntity(entityType == EntityType.ENTITY)
|
||||
$recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$recommendation.removeFromGraph();
|
||||
$recommendation.remove();
|
||||
retract($recommendation);
|
||||
end
|
||||
|
||||
@ -1468,7 +1496,8 @@ rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENT
|
||||
$higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY)
|
||||
$lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$lowerRank.removeFromGraph();
|
||||
$lowerRank.getIntersectingNodes().forEach(node -> update(node));
|
||||
$lowerRank.remove();
|
||||
retract($lowerRank);
|
||||
end
|
||||
|
||||
|
||||
@ -14,6 +14,11 @@ import java.util.Optional;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SectionIdentifier;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Headline;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.*;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.*;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType;
|
||||
@ -308,7 +313,18 @@ rule "DOC.5.0: Strain"
|
||||
entity.apply("DOC.5.0", "Strain found.", "n-a");
|
||||
});
|
||||
end
|
||||
|
||||
rule "DOC.6.0"
|
||||
when
|
||||
Headline(containsStringIgnoreCase("materials and methods"), $sectionIdentifierMaterials: getSectionIdentifier())
|
||||
Headline(containsStringIgnoreCase("controls"), getSectionIdentifier().isChildOf($sectionIdentifierMaterials), $sectionIdentifierControls: getSectionIdentifier())
|
||||
$headline: Headline(containsStringIgnoreCase("positive control substances"), getSectionIdentifier().isChildOf($sectionIdentifierControls))
|
||||
then
|
||||
System.out.println($headline);
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "irgendwas", EntityType.ENTITY)
|
||||
.forEach(entity -> {
|
||||
entity.apply("DOC.6.0", "positive control substance found", "n-a");
|
||||
});
|
||||
end
|
||||
|
||||
//rule "DOC.7.0: study title by document structure"
|
||||
// when
|
||||
@ -328,7 +344,7 @@ rule "DOC.7.0: study title"
|
||||
when
|
||||
$section: Section(isOnPage(1) && (containsString("Final Report") || containsString("SPL")))
|
||||
then
|
||||
entityCreationService.byRegexWithLinebreaks("(?<=\\n)[\\w\\W]{1,300}(?=\\nFinal Report)", "title", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> {
|
||||
entityCreationService.byRegexWithLineBreaks("(?<=\\n)[\\w\\W]{1,300}(?=\\nFinal Report)", "title", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> {
|
||||
entity.apply("DOC.7.0", "Title found", "n-a");
|
||||
});
|
||||
entityCreationService.betweenStrings("TITLE", "DATA REQUIREMENT", "title", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> {
|
||||
@ -358,6 +374,8 @@ rule "DOC.8.1: Performing Laboratory (Name)"
|
||||
nerEntities.streamEntitiesOfType("COUNTRY")
|
||||
.filter(nerEntity -> $section.getBoundary().contains(nerEntity.boundary()))
|
||||
.map(nerEntity -> entityCreationService.byNerEntity(nerEntity, "laboratory_country", EntityType.ENTITY, $section))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.forEach(entity -> {
|
||||
entity.apply("DOC.8.2", "Performing Laboratory found", "n-a");
|
||||
insert(entity);
|
||||
@ -572,8 +590,8 @@ rule "DOC.13.0: Clinical Signs"
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "clinical_signs", EntityType.ENTITY, $section);
|
||||
entity.apply("DOC.13.0", "Clinical Signs found", "n-a");
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_signs", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.13.0", "Clinical Signs found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -591,7 +609,7 @@ rule "DOC.14.0: Dosages"
|
||||
entityCreationService.betweenStrings("Animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> {
|
||||
entity.apply("DOC.14.0", "Dosage found", "n-a");
|
||||
});
|
||||
entityCreationService.byRegexWithLinebreaks("(?:\\.[\\s|\\n]|^.{5,20}\\n)([^\\.]{1,200}(?:animal|given|received)[^\\.]{1,200}dose\\s(?:levels?\\s)?(?:of|at)[^\\.]{1,200})(?:\\.[\\s|\\n|$])", "dosages", EntityType.ENTITY,1, $section).forEach(entity -> {
|
||||
entityCreationService.byRegexWithLineBreaks("(?:\\.[\\s|\\n]|^.{5,20}\\n)([^\\.]{1,200}(?:animal|given|received)[^\\.]{1,200}dose\\s(?:levels?\\s)?(?:of|at)[^\\.]{1,200})(?:\\.[\\s|\\n|$])", "dosages", EntityType.ENTITY,1, $section).forEach(entity -> {
|
||||
entity.apply("DOC.14.0", "Dosage found", "n-a");
|
||||
});
|
||||
end
|
||||
@ -602,8 +620,8 @@ rule "DOC.15.0: Mortality"
|
||||
$headline: Headline(containsString("Mortality") && !containsString("TABLE") && hasParagraphs())
|
||||
FileAttribute(label == "OECD Number", value == "425")
|
||||
then
|
||||
var entity = entityCreationService.byBoundary(Boundary.merge($headline.getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "mortality", EntityType.ENTITY, $headline.getParent());
|
||||
entity.apply("DOC.15.0", "Mortality found", "n-a");
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.15.0", "Mortality found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -615,8 +633,8 @@ rule "DOC.17.0: Study Conclusion"
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "study_conclusion", EntityType.ENTITY, $section);
|
||||
entity.apply("DOC.17.0", "Study Conclusion found", "n-a");
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "study_conclusion", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.17.0", "Study Conclusion found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -634,8 +652,8 @@ rule "DOC.18.0: Weight Behavior Changes"
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "weight_behavior_changes", EntityType.ENTITY, $section);
|
||||
entity.apply("DOC.18.0", "Weight behavior changes found", "n-a");
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "weight_behavior_changes", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.18.0", "Weight behavior changes found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -653,8 +671,8 @@ rule "DOC.19.0: Necropsy findings"
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "necropsy_findings", EntityType.ENTITY, $section);
|
||||
entity.apply("DOC.19.0", "Necropsy section found", "n-a");
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "necropsy_findings", EntityType.ENTITY)
|
||||
.forEach( entity -> entity.apply("DOC.19.0", "Necropsy section found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -673,8 +691,8 @@ rule "DOC.22.0: Clinical observations"
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "clinical_observations", EntityType.ENTITY, $section);
|
||||
entity.apply("DOC.22.0", "Clinical observations section found", "n-a");
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_observations", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.22.0", "Clinical observations section found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -730,8 +748,8 @@ rule "DOC.23.0: Bodyweight changes"
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "bodyweight_changes", EntityType.ENTITY, $section);
|
||||
entity.apply("DOC.23.0", "Bodyweight section found", "n-a");
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "bodyweight_changes", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.23.0", "Bodyweight section found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -743,8 +761,8 @@ rule "DOC.24.0: Study Design"
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "study_design", EntityType.ENTITY, $section);
|
||||
entity.apply("DOC.24.0", "Study design section found", "n-a");
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "study_design", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.24.0", "Study design section found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -765,8 +783,8 @@ rule "DOC.25.0: Results and Conclusion (406, 428, 438, 439, 474 & 487)"
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "results_and_conclusion", EntityType.ENTITY, $section);
|
||||
entity.apply("DOC.25.0", "Results and Conclusion found", "n-a");
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "results_and_conclusion", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.25.0", "Results and Conclusion found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -800,8 +818,8 @@ rule "DOC.32.0: Preliminary Test Results (429)"
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "preliminary_test_results", EntityType.ENTITY, $section);
|
||||
entity.apply("DOC.32.0", "Preliminary Test Results found", "n-a");
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "preliminary_test_results", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.32.0", "Preliminary Test Results found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -810,8 +828,8 @@ rule "DOC.33.0: Test Results (429)"
|
||||
FileAttribute(label == "OECD Number", value == "429")
|
||||
$section: Section((getHeadline().containsString("RESULTS AND DISCUSSION") || getHeadline().containsString("Estimation of the proliferative response of lymph node cells") || getHeadline().containsString("Results in the Main Experiment")) && hasParagraphs())
|
||||
then
|
||||
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "test_results", EntityType.ENTITY, $section);
|
||||
entity.apply("DOC.33.0", "Test Results found", "n-a");
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "test_results", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.33.0", "Test Results found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -946,8 +964,8 @@ rule "DOC.39.0: Dilution of the test substance"
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "dilution", EntityType.ENTITY, $section);
|
||||
entity.apply("DOC.39.0", "Dilution found.", "n-a");
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "dilution", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.39.0", "Dilution found.", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -960,8 +978,8 @@ rule "DOC.40.0: Positive Control"
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "positive_control", EntityType.ENTITY, $section);
|
||||
entity.apply("DOC.40.0", "Positive control found.", "n-a");
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "positive_control", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.40.0", "Positive control found.", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -970,8 +988,8 @@ rule "DOC.42.0: Mortality Statement"
|
||||
FileAttribute(label == "OECD Number", value == "402")
|
||||
$headline: Headline(containsString("Mortality") && !containsString("TABLE") && hasParagraphs())
|
||||
then
|
||||
var entity = entityCreationService.byBoundary(Boundary.merge($headline.getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "mortality_statement", EntityType.ENTITY, $headline.getParent());
|
||||
entity.apply("DOC.42.0", "Mortality Statement found", "n-a");
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality_statement", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.42.0", "Mortality Statement found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -1043,8 +1061,8 @@ rule "DOC.45.0: Doses (mg/kg bodyweight)"
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "doses_(mg_kg_bw)", EntityType.ENTITY, $section);
|
||||
entity.apply("DOC.45.0", "Doses per bodyweight information found", "n-a");
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "doses_(mg_kg_bw)", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.45.0", "Doses per bodyweight information found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -1090,11 +1108,16 @@ rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to
|
||||
rule "MAN.2.0: Apply force redaction"
|
||||
salience 128
|
||||
when
|
||||
ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
|
||||
$entityToForce: RedactionEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis);
|
||||
$entityToForce.setRemoved(false);
|
||||
$entityToForce.setIgnored(false);
|
||||
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
|
||||
update($entityToForce);
|
||||
$entityToForce.getIntersectingNodes().forEach(node -> update(node));
|
||||
retract($force);
|
||||
end
|
||||
|
||||
|
||||
|
||||
@ -184,7 +184,6 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red
|
||||
then
|
||||
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||
.forEach(entity -> {
|
||||
entity.addEngine(Engine.RULE);
|
||||
entity.applyWithReferences(
|
||||
"CBI.5.0",
|
||||
"no_redaction_indicator but also redaction_indicator found",
|
||||
@ -202,10 +201,9 @@ rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also red
|
||||
hasEntitiesOfType("redaction_indicator"),
|
||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||
then
|
||||
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no-redaction_indicator"))
|
||||
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator"))
|
||||
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
|
||||
.forEach(entity -> {
|
||||
entity.addEngine(Engine.RULE);
|
||||
entity.applyWithReferences(
|
||||
"CBI.5.1",
|
||||
"no_redaction_indicator but also redaction_indicator found",
|
||||
@ -471,11 +469,13 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials"
|
||||
anyMatch(textAfter, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)")
|
||||
)
|
||||
then
|
||||
RedactionEntity expandedEntity = entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)");
|
||||
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
|
||||
$entityToExpand.removeFromGraph();
|
||||
retract($entityToExpand);
|
||||
insert(expandedEntity);
|
||||
entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)")
|
||||
.ifPresent(expandedEntity -> {
|
||||
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
|
||||
$entityToExpand.remove();
|
||||
retract($entityToExpand);
|
||||
insert(expandedEntity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
@ -484,11 +484,13 @@ rule "CBI.19.0: Expand CBI_author entities with salutation prefix"
|
||||
when
|
||||
$entityToExpand: RedactionEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
|
||||
then
|
||||
RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*");
|
||||
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
|
||||
$entityToExpand.removeFromGraph();
|
||||
retract($entityToExpand);
|
||||
insert(expandedEntity);
|
||||
entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")
|
||||
.ifPresent(expandedEntity -> {
|
||||
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
|
||||
$entityToExpand.remove();
|
||||
retract($entityToExpand);
|
||||
insert(expandedEntity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
@ -836,10 +838,12 @@ rule "PII.12.0: Expand PII entities with salutation prefix"
|
||||
when
|
||||
$entityToExpand: RedactionEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
|
||||
then
|
||||
RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*");
|
||||
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
|
||||
expandedEntity.addEngine(Engine.RULE);
|
||||
insert(expandedEntity);
|
||||
entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")
|
||||
.ifPresent(expandedEntity -> {
|
||||
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
|
||||
expandedEntity.addEngine(Engine.RULE);
|
||||
insert(expandedEntity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
@ -909,8 +913,9 @@ rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confi
|
||||
not FileAttribute(label == "Confidentiality", value == "confidential")
|
||||
$dossierRedaction: RedactionEntity(type == "dossier_redaction")
|
||||
then
|
||||
$dossierRedaction.removeFromGraph();
|
||||
retract($dossierRedaction);
|
||||
$dossierRedaction.setIgnored(true);
|
||||
update($dossierRedaction);
|
||||
$dossierRedaction.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
|
||||
|
||||
@ -970,6 +975,8 @@ rule "AI.0.0: add all NER Entities of type CBI_author"
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("CBI_author")
|
||||
.map(nerEntity -> entityCreationService.byNerEntity(nerEntity, EntityType.RECOMMENDATION, document))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.forEach(entity -> insert(entity));
|
||||
end
|
||||
|
||||
@ -982,6 +989,8 @@ rule "AI.1.0: combine and add NER Entities as CBI_address"
|
||||
then
|
||||
nerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
|
||||
.map(boundary -> entityCreationService.byBoundary(boundary, "CBI_address", EntityType.RECOMMENDATION, document))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.forEach(entity -> {
|
||||
entity.addEngine(Engine.NER);
|
||||
insert(entity);
|
||||
@ -1001,6 +1010,7 @@ rule "MAN.0.0: Apply manual resize redaction"
|
||||
manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
|
||||
retract($resizeRedaction);
|
||||
update($entityToBeResized);
|
||||
$entityToBeResized.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
|
||||
|
||||
@ -1008,21 +1018,27 @@ rule "MAN.0.0: Apply manual resize redaction"
|
||||
rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity"
|
||||
salience 128
|
||||
when
|
||||
IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
|
||||
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToBeRemoved.setIgnored(true);
|
||||
update($entityToBeRemoved);
|
||||
retract($idRemoval);
|
||||
$entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
|
||||
rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image"
|
||||
salience 128
|
||||
when
|
||||
IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
$idRemoval: IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
|
||||
$imageEntityToBeRemoved: Image($id == id)
|
||||
then
|
||||
$imageEntityToBeRemoved.setIgnored(true);
|
||||
update($imageEntityToBeRemoved);
|
||||
retract($idRemoval);
|
||||
update($imageEntityToBeRemoved.getParent());
|
||||
end
|
||||
|
||||
|
||||
@ -1030,11 +1046,16 @@ rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to
|
||||
rule "MAN.2.0: Apply force redaction"
|
||||
salience 128
|
||||
when
|
||||
ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
|
||||
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
|
||||
$entityToForce: RedactionEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis);
|
||||
$entityToForce.setRemoved(false);
|
||||
$entityToForce.setIgnored(false);
|
||||
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
|
||||
update($entityToForce);
|
||||
$entityToForce.getIntersectingNodes().forEach(node -> update(node));
|
||||
retract($force);
|
||||
end
|
||||
|
||||
|
||||
@ -1042,10 +1063,13 @@ rule "MAN.2.0: Apply force redaction"
|
||||
rule "MAN.3.0: Apply image recategorization"
|
||||
salience 128
|
||||
when
|
||||
ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
|
||||
$image: Image($id == id)
|
||||
$recategorization: ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
|
||||
$imageToBeRecategorized: Image($id == id)
|
||||
then
|
||||
$image.setImageType(ImageType.fromString($imageType));
|
||||
$imageToBeRecategorized.setImageType(ImageType.fromString($imageType));
|
||||
update($imageToBeRecategorized);
|
||||
retract($recategorization);
|
||||
update($imageToBeRecategorized.getParent());
|
||||
end
|
||||
|
||||
|
||||
@ -1058,7 +1082,7 @@ rule "X.0.0: remove Entity contained by Entity of same type"
|
||||
$larger: RedactionEntity($type: type, $entityType: entityType)
|
||||
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$contained.removeFromGraph();
|
||||
$contained.remove();
|
||||
retract($contained);
|
||||
end
|
||||
|
||||
@ -1070,12 +1094,13 @@ rule "X.1.0: merge intersecting Entities of same type"
|
||||
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$first.removeFromGraph();
|
||||
$second.removeFromGraph();
|
||||
$first.remove();
|
||||
$second.remove();
|
||||
RedactionEntity mergedEntity = entityCreationService.byEntities(List.of($first, $second), $type, $entityType, document);
|
||||
retract($first);
|
||||
retract($second);
|
||||
insert(mergedEntity);
|
||||
mergedEntity.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
|
||||
|
||||
@ -1086,7 +1111,8 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
|
||||
$falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE)
|
||||
$entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$entity.removeFromGraph();
|
||||
$entity.getIntersectingNodes().forEach(node -> update(node));
|
||||
$entity.remove();
|
||||
retract($entity)
|
||||
end
|
||||
|
||||
@ -1098,7 +1124,7 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM
|
||||
$falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION)
|
||||
$recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$recommendation.removeFromGraph();
|
||||
$recommendation.remove();
|
||||
retract($recommendation);
|
||||
end
|
||||
|
||||
@ -1111,7 +1137,7 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit
|
||||
$recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$entity.addEngines($recommendation.getEngines());
|
||||
$recommendation.removeFromGraph();
|
||||
$recommendation.remove();
|
||||
retract($recommendation);
|
||||
end
|
||||
|
||||
@ -1123,7 +1149,7 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
|
||||
$entity: RedactionEntity(entityType == EntityType.ENTITY)
|
||||
$recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$recommendation.removeFromGraph();
|
||||
$recommendation.remove();
|
||||
retract($recommendation);
|
||||
end
|
||||
|
||||
@ -1135,7 +1161,8 @@ rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENT
|
||||
$higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY)
|
||||
$lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$lowerRank.removeFromGraph();
|
||||
$lowerRank.getIntersectingNodes().forEach(node -> update(node));
|
||||
$lowerRank.remove();
|
||||
retract($lowerRank);
|
||||
end
|
||||
|
||||
|
||||
@ -56,9 +56,11 @@ rule "add NER Entities of type CBI_author or CBI_address"
|
||||
when
|
||||
$nerEntity: EntityRecognitionEntity($type: type, (type == "CBI_author" || type == "CBI_address"))
|
||||
then
|
||||
RedactionEntity redactionEntity = entityCreationService.byBoundary(new Boundary($nerEntity.getStartOffset(), $nerEntity.getEndOffset()), $type, EntityType.RECOMMENDATION, document);
|
||||
redactionEntity.addEngine(Engine.NER);
|
||||
insert(redactionEntity);
|
||||
entityCreationService.byBoundary(new Boundary($nerEntity.getStartOffset(), $nerEntity.getEndOffset()), $type, EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(redactionEntity -> {
|
||||
redactionEntity.addEngine(Engine.NER);
|
||||
insert(redactionEntity);
|
||||
});
|
||||
end
|
||||
|
||||
// --------------------------------------- CBI rules -------------------------------------------------------------------
|
||||
@ -81,91 +83,126 @@ rule "Always redact PII"
|
||||
$cbiAuthor.apply("PII.0.0", "PII found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
// --------------------------------------- merging rules -------------------------------------------------------------------
|
||||
//------------------------------------ Entity merging rules ------------------------------------
|
||||
|
||||
rule "remove Entity contained by Entity of same type"
|
||||
// Rule unit: X.0
|
||||
rule "X.0.0: remove Entity contained by Entity of same type"
|
||||
salience 65
|
||||
when
|
||||
$larger: RedactionEntity($type: type, $entityType: entityType)
|
||||
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$contained.removeFromGraph();
|
||||
$contained.remove();
|
||||
retract($contained);
|
||||
end
|
||||
|
||||
rule "merge intersecting Entities of same type"
|
||||
|
||||
// Rule unit: X.1
|
||||
rule "X.1.0: merge intersecting Entities of same type"
|
||||
salience 64
|
||||
when
|
||||
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$first.removeFromGraph();
|
||||
$second.removeFromGraph();
|
||||
$first.remove();
|
||||
$second.remove();
|
||||
RedactionEntity mergedEntity = entityCreationService.byEntities(List.of($first, $second), $type, $entityType, document);
|
||||
retract($first);
|
||||
retract($second);
|
||||
insert(mergedEntity);
|
||||
mergedEntity.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
|
||||
rule "remove Entity of type ENTITY when contained by FALSE_POSITIVE"
|
||||
|
||||
// Rule unit: X.2
|
||||
rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE"
|
||||
salience 64
|
||||
when
|
||||
$falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE)
|
||||
$entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$entity.removeFromGraph();
|
||||
$entity.getIntersectingNodes().forEach(node -> update(node));
|
||||
$entity.remove();
|
||||
retract($entity)
|
||||
end
|
||||
|
||||
rule "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"
|
||||
|
||||
// Rule unit: X.3
|
||||
rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"
|
||||
salience 64
|
||||
when
|
||||
$falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION)
|
||||
$recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$recommendation.removeFromGraph();
|
||||
$recommendation.remove();
|
||||
retract($recommendation);
|
||||
end
|
||||
|
||||
rule "remove Entity of type RECOMMENDATION when contained by ENTITY"
|
||||
salience 64
|
||||
|
||||
// Rule unit: X.4
|
||||
rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"
|
||||
salience 256
|
||||
when
|
||||
$entity: RedactionEntity($type: type, entityType == EntityType.ENTITY)
|
||||
$recommendation: RedactionEntity(containedBy($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
$recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$recommendation.removeFromGraph();
|
||||
$entity.addEngines($recommendation.getEngines());
|
||||
$recommendation.remove();
|
||||
retract($recommendation);
|
||||
end
|
||||
|
||||
rule "remove Entity of lower rank, when equal boundaries and entityType"
|
||||
|
||||
// Rule unit: X.5
|
||||
rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY"
|
||||
salience 256
|
||||
when
|
||||
$entity: RedactionEntity(entityType == EntityType.ENTITY)
|
||||
$recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$recommendation.remove();
|
||||
retract($recommendation);
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: X.6
|
||||
rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY"
|
||||
salience 32
|
||||
when
|
||||
$higherRank: RedactionEntity($type: type, $entityType: entityType, $boundary: boundary)
|
||||
$lowerRank: RedactionEntity($boundary == boundary, type != $type, entityType == $entityType, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !applied)
|
||||
$higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY)
|
||||
$lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$lowerRank.removeFromGraph();
|
||||
$lowerRank.getIntersectingNodes().forEach(node -> update(node));
|
||||
$lowerRank.remove();
|
||||
retract($lowerRank);
|
||||
end
|
||||
|
||||
// --------------------------------------- FileAttribute Rules -------------------------------------------------------------------
|
||||
|
||||
rule "remove duplicate FileAttributes"
|
||||
//------------------------------------ File attributes rules ------------------------------------
|
||||
|
||||
// Rule unit: FA.1
|
||||
rule "FA.1.0: remove duplicate FileAttributes"
|
||||
salience 64
|
||||
when
|
||||
$first: FileAttribute($label: label, $value: value)
|
||||
$second: FileAttribute(this != $first, label == $label, value == $value)
|
||||
$fileAttribute: FileAttribute($label: label, $value: value)
|
||||
$duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value)
|
||||
then
|
||||
retract($second);
|
||||
retract($duplicate);
|
||||
end
|
||||
|
||||
// --------------------------------------- local dictionary search -------------------------------------------------------------------
|
||||
|
||||
rule "run local dictionary search"
|
||||
//------------------------------------ Local dictionary search rules ------------------------------------
|
||||
|
||||
// Rule unit: LDS.0
|
||||
rule "LDS.0.0: run local dictionary search"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
salience -999
|
||||
when
|
||||
DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels()
|
||||
then
|
||||
entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document)
|
||||
.forEach(redactionEntity -> insert(redactionEntity));
|
||||
.forEach(entity -> {
|
||||
entity.addEngine(Engine.RULE);
|
||||
insert(entity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user