RED-6929: Fix Acceptance Tests/Rules

This commit is contained in:
Kilian Schüttler 2023-07-03 17:10:08 +02:00
parent 41d065afaa
commit 5625d1ff01
18 changed files with 1651 additions and 800 deletions

View File

@ -101,10 +101,13 @@ public class RedactionLogEntryAdapter {
redactionLogEntry.getType(),
redactionLogEntry.isRecommendation() ? EntityType.RECOMMENDATION : EntityType.ENTITY,
node);
correctEntity.setLegalBasis(redactionLogEntry.getLegalBasis());
correctEntity.setRedactionReason(redactionLogEntry.getReason());
correctEntity.addMatchedRule(redactionLogEntry.getMatchedRule());
correctEntity.setRedaction(redactionLogEntry.isRedacted());
String ruleIdentifier = redactionLogEntry.getType() + "." + redactionLogEntry.getMatchedRule() + ".0";
if (redactionLogEntry.isRedacted()) {
correctEntity.apply(ruleIdentifier, redactionLogEntry.getReason(), redactionLogEntry.getLegalBasis());
} else {
correctEntity.skip(ruleIdentifier, redactionLogEntry.getReason());
}
correctEntity.setDictionaryEntry(redactionLogEntry.isDictionaryEntry());
correctEntity.setDossierDictionaryEntry(redactionLogEntry.isDossierDictionaryEntry());
return correctEntity;

View File

@ -0,0 +1,33 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity;
import java.util.Collections;
import java.util.Objects;
import java.util.Set;
public record MatchedRule(RuleIdentifier ruleIdentifier, String reason, String legalBasis, boolean applied, Set<RedactionEntity> references) implements Comparable<MatchedRule> {
public static MatchedRule empty() {
return new MatchedRule(RuleIdentifier.empty(), "", "", false, Collections.emptySet());
}
@Override
public int compareTo(MatchedRule matchedRule) {
RuleIdentifier otherRuleIdentifier = matchedRule.ruleIdentifier();
if (!Objects.equals(ruleIdentifier.type(), otherRuleIdentifier.type())) {
if (Objects.equals(otherRuleIdentifier.type(), "MAN")) {
return 1;
}
if (Objects.equals(ruleIdentifier.type(), "MAN")) {
return -1;
}
}
if (!Objects.equals(otherRuleIdentifier.unit(), ruleIdentifier().unit())) {
return otherRuleIdentifier.unit() - ruleIdentifier.unit();
}
return otherRuleIdentifier.id() - ruleIdentifier.id();
}
}

View File

@ -2,12 +2,13 @@ package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.e
import java.awt.geom.Rectangle2D;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Deque;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
@ -21,6 +22,7 @@ import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.NonNull;
import lombok.experimental.FieldDefaults;
@Data
@ -39,19 +41,18 @@ public class RedactionEntity {
final EntityType entityType;
// empty defaults
boolean redaction;
boolean removed;
boolean ignored;
boolean resized;
boolean skipRemoveEntitiesContainedInLarger;
boolean dictionaryEntry;
boolean dossierDictionaryEntry;
Set<Engine> engines;
Set<RedactionEntity> references;
@Builder.Default
Deque<String> matchedRules = new LinkedList<>();
String redactionReason;
String legalBasis;
Set<Engine> engines = new HashSet<>();
@Builder.Default
PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
// inferred on graph insertion
@EqualsAndHashCode.Include
@ -68,7 +69,19 @@ public class RedactionEntity {
public static RedactionEntity initialEntityNode(Boundary boundary, String type, EntityType entityType) {
return RedactionEntity.builder().type(type).entityType(entityType).boundary(boundary).engines(new HashSet<>()).references(new HashSet<>()).build();
return RedactionEntity.builder().type(type).entityType(entityType).boundary(boundary).build();
}
public boolean isApplied() {
return getMatchedRule().applied();
}
public Set<RedactionEntity> getReferences() {
return getMatchedRule().references();
}
@ -120,28 +133,60 @@ public class RedactionEntity {
}
public void addMatchedRule(String ruleIdentifier) {
public void apply(@NonNull String ruleIdentifier, String comment, @NonNull String legalBasis) {
matchedRules.add(ruleIdentifier);
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
}
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, legalBasis, true, Collections.emptySet()));
}
public void applyWithReferences(@NonNull String ruleIdentifier, String comment, @NonNull String legalBasis, Collection<RedactionEntity> references) {
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
}
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, legalBasis, true, new HashSet<>(references)));
}
public void skip(@NonNull String ruleIdentifier, String comment) {
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, "", false, Collections.emptySet()));
}
public void skipWithReferences(@NonNull String ruleIdentifier, String comment, Collection<RedactionEntity> references) {
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, "", false, new HashSet<>(references)));
}
public void addMatchedRule(MatchedRule matchedRule) {
matchedRuleList.add(matchedRule);
}
public void addMatchedRules(Collection<MatchedRule> matchedRules) {
matchedRuleList.addAll(matchedRules);
}
public int getMatchedRuleUnit() {
String[] values = getMatchedRule().split("\\.");
if (values.length < 2) {
return -1;
}
return Integer.parseInt(values[1]);
return getMatchedRule().ruleIdentifier().unit();
}
public String getMatchedRule() {
public MatchedRule getMatchedRule() {
if (matchedRules.isEmpty()) {
return "";
if (matchedRuleList.isEmpty()) {
return MatchedRule.empty();
}
return matchedRules.getLast();
return matchedRuleList.peek();
}
@ -202,18 +247,6 @@ public class RedactionEntity {
}
public void addReference(RedactionEntity reference) {
references.add(reference);
}
public void addReferences(List<RedactionEntity> references) {
this.references.addAll(references);
}
public boolean matchesAnnotationId(String manualRedactionId) {
return getRedactionPositionsPerPage().stream().anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId));

View File

@ -0,0 +1,41 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity;
import java.util.Objects;
public record RuleIdentifier(String type, Integer unit, Integer id) {
public static RuleIdentifier fromString(String identifier) {
String[] values = identifier.split("\\.");
if (values.length != 3) {
throw new IllegalArgumentException("Illegal rule identifier provided: " + identifier);
}
String type = values[0];
Integer group = Integer.parseInt(values[1]);
Integer id = Integer.parseInt(values[2]);
return new RuleIdentifier(type, group, id);
}
public static RuleIdentifier empty() {
return new RuleIdentifier("", null, null);
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(type());
if (Objects.nonNull(unit()) && Objects.nonNull(id())) {
sb.append(".").append(unit()).append(".").append(id());
} else if (Objects.nonNull(id())) {
sb.append(".*.").append(id());
} else if (Objects.nonNull(unit())) {
sb.append(".").append(unit()).append(".*");
}
return sb.toString();
}
}

View File

@ -1,15 +1,19 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
import java.awt.geom.Rectangle2D;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.MatchedRule;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RuleIdentifier;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
@ -19,6 +23,7 @@ import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
import lombok.NonNull;
import lombok.experimental.FieldDefaults;
@Data
@ -35,14 +40,10 @@ public class Image implements GenericSemanticNode {
boolean transparent;
Rectangle2D position;
boolean redaction;
boolean ignored;
@Builder.Default
String redactionReason = "";
@Builder.Default
String legalBasis = "";
@Builder.Default
String matchedRule = "";
PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
@EqualsAndHashCode.Exclude
Page page;
@ -55,6 +56,72 @@ public class Image implements GenericSemanticNode {
Set<RedactionEntity> entities = new HashSet<>();
public void apply(@NonNull String ruleIdentifier, String comment, @NonNull String legalBasis) {
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
}
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, legalBasis, true, Collections.emptySet()));
}
public void applyWithReferences(@NonNull String ruleIdentifier, String comment, @NonNull String legalBasis, Collection<RedactionEntity> references) {
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity");
}
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, legalBasis, true, new HashSet<>(references)));
}
public void skip(@NonNull String ruleIdentifier, String comment) {
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, "", false, Collections.emptySet()));
}
public void skipWithReferences(@NonNull String ruleIdentifier, String comment, Collection<RedactionEntity> references) {
matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, "", false, new HashSet<>(references)));
}
public void addMatchedRule(MatchedRule matchedRule) {
matchedRuleList.add(matchedRule);
}
public void addMatchedRules(Collection<MatchedRule> matchedRules) {
matchedRuleList.addAll(matchedRules);
}
public boolean isApplied() {
return getMatchedRule().applied();
}
public Set<RedactionEntity> getReferences() {
return getMatchedRule().references();
}
public int getMatchedRuleUnit() {
return getMatchedRule().ruleIdentifier().unit();
}
public MatchedRule getMatchedRule() {
return matchedRuleList.peek();
}
@Override
public NodeType getType() {

View File

@ -1,5 +1,22 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.services;
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.getExpandedEndByRegex;
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.getExpandedStartByRegex;
import static com.iqser.red.service.redaction.v1.server.redaction.utils.SeparatorUtils.boundaryIsSurroundedBySeparators;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
@ -14,17 +31,9 @@ import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.Re
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.SearchImplementation;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.getExpandedEndByRegex;
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.getExpandedStartByRegex;
import static com.iqser.red.service.redaction.v1.server.redaction.utils.SeparatorUtils.boundaryIsSurroundedBySeparators;
@Slf4j
@Service
@ -259,13 +268,9 @@ public class EntityCreationService {
}
RedactionEntity mergedEntity = RedactionEntity.initialEntityNode(Boundary.merge(entitiesToMerge.stream().map(RedactionEntity::getBoundary).toList()), type, entityType);
mergedEntity.setRedaction(entitiesToMerge.stream().anyMatch(RedactionEntity::isRedaction));
mergedEntity.addEngines(entitiesToMerge.stream().flatMap(entityNode -> entityNode.getEngines().stream()).collect(Collectors.toSet()));
entitiesToMerge.stream().map(RedactionEntity::getMatchedRules).flatMap(Collection::stream).forEach(mergedEntity::addMatchedRule);
entitiesToMerge.stream().map(RedactionEntity::getMatchedRuleList).flatMap(Collection::stream).forEach(matchedRule -> mergedEntity.getMatchedRuleList().add(matchedRule));
RedactionEntity entityWithHigherRuleNumber = entitiesToMerge.stream().max(Comparator.comparingInt(RedactionEntity::getMatchedRuleUnit)).orElse(entitiesToMerge.get(0));
mergedEntity.setRedactionReason(entityWithHigherRuleNumber.getRedactionReason());
mergedEntity.setLegalBasis(entityWithHigherRuleNumber.getLegalBasis());
mergedEntity.setDictionaryEntry(entitiesToMerge.stream().anyMatch(RedactionEntity::isDictionaryEntry));
mergedEntity.setDossierDictionaryEntry(entitiesToMerge.stream().anyMatch(RedactionEntity::isDossierDictionaryEntry));
mergedEntity.setIgnored(entitiesToMerge.stream().allMatch(RedactionEntity::isIgnored));

View File

@ -87,18 +87,18 @@ public class RedactionLogCreatorService {
int sectionNumber = entity.getDeepestFullyContainingNode().getTreeId().isEmpty() ? 0 : entity.getDeepestFullyContainingNode().getTreeId().get(0);
return RedactionLogEntry.builder()
.color(getColor(entity.getType(), dossierTemplateId, entity.isRedaction()))
.reason(entity.getRedactionReason())
.legalBasis(entity.getLegalBasis())
.color(getColor(entity.getType(), dossierTemplateId, entity.isApplied()))
.reason(entity.getMatchedRule().reason())
.legalBasis(entity.getMatchedRule().legalBasis())
.value(entity.getValue())
.type(entity.getType())
.redacted(entity.isRedaction())
.redacted(entity.isApplied())
.isHint(isHint(entity.getType(), dossierTemplateId))
.isRecommendation(entity.getEntityType().equals(EntityType.RECOMMENDATION))
.isFalsePositive(entity.getEntityType().equals(EntityType.FALSE_POSITIVE) || entity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION))
.section(entity.getDeepestFullyContainingNode().toString())
.sectionNumber(sectionNumber)
.matchedRule(entity.getMatchedRule())
.matchedRule(entity.getMatchedRule().ruleIdentifier().toString())
.isDictionaryEntry(entity.isDictionaryEntry())
.textAfter(entity.getTextAfter())
.textBefore(entity.getTextBefore())
@ -116,13 +116,13 @@ public class RedactionLogCreatorService {
String imageType = image.getImageType().equals(ImageType.OTHER) ? "image" : image.getImageType().toString().toLowerCase();
return RedactionLogEntry.builder()
.id(image.getId())
.color(getColor(image.getImageType().toString(), dossierTemplateId, image.isRedaction()))
.color(getColor(image.getImageType().toString(), dossierTemplateId, image.isApplied()))
.isImage(true)
.type(imageType)
.redacted(image.isRedaction())
.reason(image.getRedactionReason())
.legalBasis(image.getLegalBasis())
.matchedRule(image.getMatchedRule())
.redacted(image.isApplied())
.reason(image.getMatchedRule().reason())
.legalBasis(image.getMatchedRule().legalBasis())
.matchedRule(image.getMatchedRule().ruleIdentifier().toString())
.isHint(dictionaryService.isHint(image.getImageType().toString(), dossierTemplateId))
.isDictionaryEntry(false)
.isRecommendation(false)

View File

@ -7,6 +7,7 @@ import java.io.IOException;
import java.util.List;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
@ -41,6 +42,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
@Test
@Disabled
public void titleExtraction() throws IOException {
AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/A8591B/15-Curacron_ToxicidadeAgudaOral.pdf");
@ -63,12 +65,13 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
}
@Test
@Disabled
public void tableWithEmptyCols() throws IOException {
// FIXME TableNodeFactory: 36, why has table no rows/cols here.
AnalyzeRequest request = prepareStorage("files/Documine/Flora/A8591B/2009773e2e05919bb9e46aeedcc8b924.ORIGIN.pdf","files/Documine/Flora/A8591B/2009773e2e05919bb9e46aeedcc8b924.ORIGIN.TABLES.json" );
AnalyzeRequest request = prepareStorage("files/Documine/Flora/A8591B/2009773e2e05919bb9e46aeedcc8b924.ORIGIN.pdf",
"files/Documine/Flora/A8591B/2009773e2e05919bb9e46aeedcc8b924.ORIGIN.TABLES.json");
System.out.println("Start Full integration test");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
@ -88,8 +91,6 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
}
@Configuration
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
@ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)})

View File

@ -222,7 +222,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
@Test
public void titleExtraction() throws IOException {
AnalyzeRequest request = uploadFileToStorage("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
AnalyzeRequest request = uploadFileToStorage("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf");
System.out.println("Start Full integration test");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
System.out.println("Finished structure analysis");
@ -627,6 +627,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
.annotationId("0b56ea1a87c83f351df177315af94f0d")
.fileId(TEST_FILE_ID)
.status(AnnotationStatus.APPROVED)
.legalBasis("Something")
.requestDate(OffsetDateTime.of(2022, 05, 23, 9, 30, 15, 4653, ZoneOffset.UTC))
.processedDate(OffsetDateTime.of(2022, 05, 23, 9, 30, 15, 794, ZoneOffset.UTC))
.build());

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.server.document.entity;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertThrows;
import org.junit.jupiter.api.Test;
@ -14,12 +15,52 @@ public class RedactionEntityTest {
public void testMatchedRule() {
RedactionEntity entity = RedactionEntity.initialEntityNode(new Boundary(1, 100), "PII", EntityType.ENTITY);
entity.addMatchedRule("CBI.1.0");
entity.addMatchedRule("CBI.2.0");
entity.addMatchedRule("CBI.3.0");
entity.addMatchedRule("CBI.4.0");
assertThat(entity.getMatchedRule()).isEqualTo("CBI.4.0");
entity.skip("CBI.1.0", "");
entity.skip("CBI.2.0", "");
entity.skip("CBI.3.0", "");
entity.skip("CBI.4.1", "");
entity.skip("CBI.4.0", "");
assertThat(entity.getMatchedRule().ruleIdentifier().toString()).isEqualTo("CBI.4.1");
assertThat(entity.getMatchedRuleUnit()).isEqualTo(4);
}
@Test
public void testMatchedRuleWithManualRedaction() {
RedactionEntity entity = RedactionEntity.initialEntityNode(new Boundary(1, 100), "PII", EntityType.ENTITY);
entity.skip("MAN.2.0", "");
entity.skip("CBI.2.0", "");
entity.skip("CBI.3.0", "");
entity.skip("CBI.4.1", "");
entity.skip("CBI.4.0", "");
assertThat(entity.getMatchedRule().ruleIdentifier().toString()).isEqualTo("MAN.2.0");
assertThat(entity.getMatchedRuleUnit()).isEqualTo(2);
}
@Test
public void testMatchedRuleWithNonsense() {
RedactionEntity entity = RedactionEntity.initialEntityNode(new Boundary(1, 100), "PII", EntityType.ENTITY);
assertThrows(IllegalArgumentException.class, () -> {
entity.skip("", "");
});
assertThrows(IllegalArgumentException.class, () -> {
entity.skip("CBI.1.*", "");
});
assertThrows(IllegalArgumentException.class, () -> {
entity.skip("CBI", "");
});
assertThrows(IllegalArgumentException.class, () -> {
entity.skip("aaaaaaaaaa", "");
});
assertThrows(IllegalArgumentException.class, () -> {
entity.apply("CBI.0.0", "", "");
});
entity.skip("CBI.2.0", "");
assertThat(entity.getMatchedRule().ruleIdentifier().toString()).isEqualTo("CBI.2.0");
assertThat(entity.getMatchedRuleUnit()).isEqualTo(2);
}
}

View File

@ -74,4 +74,14 @@ class BoundaryTest {
assertThrows(IndexOutOfBoundsException.class, () -> startBoundary.split(List.of(12, 40, 100)));
}
@Test
void testCompareTo() {
Boundary beforeBoundary = new Boundary(1, 8);
Boundary afterBoundary = new Boundary(101, 102);
assertEquals(-1, beforeBoundary.compareTo(startBoundary));
assertEquals(1, afterBoundary.compareTo(startBoundary));
}
}

View File

@ -1,7 +1,6 @@
package com.iqser.red.service.redaction.v1.server.document.graph;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.wildfly.common.Assert.assertFalse;
@ -124,6 +123,7 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder()
.annotationId(initialId)
.status(AnnotationStatus.APPROVED)
.legalBasis("Something")
.requestDate(OffsetDateTime.now())
.build();
@ -138,10 +138,11 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
assertFalse(entity.getIntersectingNodes().isEmpty());
assertEquals(1, entity.getPages().size());
assertEquals("David Ksenia", entity.getValue());
assertEquals("Something", entity.getMatchedRule().legalBasis());
assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId());
assertFalse(entity.isRemoved());
assertTrue(entity.isSkipRemoveEntitiesContainedInLarger());
assertTrue(entity.isRedaction());
assertTrue(entity.isApplied());
}
@ -163,12 +164,9 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
kieSession.fireAllRules();
kieSession.dispose();
assertNull(entity.getDeepestFullyContainingNode());
assertTrue(entity.getIntersectingNodes().isEmpty());
assertTrue(entity.getPages().isEmpty());
assertEquals("David Ksenia", entity.getValue());
assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId());
assertTrue(entity.isRemoved());
assertTrue(entity.isIgnored());
}
@ -185,6 +183,7 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder()
.annotationId(initialId)
.status(AnnotationStatus.APPROVED)
.legalBasis("Something")
.requestDate(OffsetDateTime.now())
.build();
@ -202,6 +201,7 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati
assertEquals("David Ksenia", entity.getValue());
assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId());
assertFalse(entity.isRemoved());
assertFalse(entity.isIgnored());
}

View File

@ -255,7 +255,7 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
List<Rectangle2D> entityPositionsOnPage = page.getEntities()
.stream()
.filter(entityNode -> !entityNode.isRemoved())
.filter(RedactionEntity::isRedaction)
.filter(RedactionEntity::isApplied)
.flatMap(entityNode -> entityNode.getRedactionPositionsPerPage().stream())
.filter(entityPosition -> entityPosition.getPage().equals(page))
.flatMap(entityPosition -> entityPosition.getRectanglePerLine().stream())
@ -269,7 +269,7 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
List<Rectangle2D> entityPositionsOnPage = page.getEntities()
.stream()
.filter(entityNode -> !entityNode.isRemoved())
.filter(entityNode -> !entityNode.isRedaction())
.filter(entityNode -> !entityNode.isApplied())
.flatMap(entityNode -> entityNode.getRedactionPositionsPerPage().stream())
.filter(entityPosition -> entityPosition.getPage().equals(page))
.flatMap(entityPosition -> entityPosition.getRectanglePerLine().stream())

View File

@ -48,9 +48,8 @@ query "getFileAttributes"
$fileAttribute: FileAttribute()
end
// --------------------------------------- manual redaction rules -------------------------------------------------------------------
rule "Apply manual resize redaction"
// Rule unit: MAN.0
rule "MAN.0.0: Apply manual resize redaction"
salience 128
when
$resizeRedaction: ManualResizeRedaction($id: annotationId)
@ -61,18 +60,19 @@ rule "Apply manual resize redaction"
update($entityToBeResized);
end
rule "Apply id removals that are valid and not in forced redactions to Entity"
// Rule unit: MAN.1
rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity"
salience 128
when
IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
then
$entityToBeRemoved.removeFromGraph();
retract($entityToBeRemoved);
$entityToBeRemoved.setIgnored(true);
end
rule "Apply id removals that are valid and not in forced redactions to Image"
rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image"
salience 128
when
IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
@ -82,18 +82,21 @@ rule "Apply id removals that are valid and not in forced redactions to Image"
$imageEntityToBeRemoved.setIgnored(true);
end
rule "Apply force redaction"
// Rule unit: MAN.2
rule "MAN.2.0: Apply force redaction"
salience 128
when
ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
$entityToForce: RedactionEntity(matchesAnnotationId($id))
then
$entityToForce.setLegalBasis($legalBasis);
$entityToForce.setRedaction(true);
$entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis);
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
end
rule "Apply image recategorization"
// Rule unit: MAN.3
rule "MAN.3.0: Apply image recategorization"
salience 128
when
ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
@ -102,4 +105,18 @@ rule "Apply image recategorization"
$image.setImageType(ImageType.fromString($imageType));
end
//------------------------------------ Local dictionary search rules ------------------------------------
// Rule unit: LDS.0
rule "LDS.0.0: run local dictionary search"
agenda-group "LOCAL_DICTIONARY_ADDS"
salience -999
when
DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels()
then
entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document)
.forEach(entity -> {
entity.addEngine(Engine.RULE);
insert(entity);
});
end

View File

@ -14,6 +14,12 @@ import java.util.Optional;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Section;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Table;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Paragraph;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Image;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.*;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.*;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType;
@ -61,8 +67,7 @@ rule "SYN.0.0: Redact if CTL/* or BL/* was found (Non Vertebrate Study)"
entityCreationService.byString("CTL", "must_redact", EntityType.ENTITY, $section),
entityCreationService.byString("BL", "must_redact", EntityType.ENTITY, $section)
).forEach(entity -> {
entity.setRedactionReason("hint_only");
entity.addMatchedRule("SYN.0.0");
entity.skip("SYN.0.0", "hint_only");
entity.addEngine(Engine.RULE);
insert(entity);
});
@ -78,12 +83,13 @@ rule "CBI.3.0: Redacted because Section contains Vertebrate"
then
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(entity -> {
entity.setRedactionReason("Vertebrate found");
entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)");
entity.setRedaction(true);
entity.addEngine(Engine.RULE);
entity.addMatchedRule("CBI.3.0");
entity.addReferences($section.getEntitiesOfType("vertebrate"));
entity.applyWithReferences(
"CBI.3.0",
"Vertebrate found",
"Reg (EC) No 1107/2009 Art. 63 (2g)",
$section.getEntitiesOfType("vertebrate")
);
});
end
@ -94,12 +100,13 @@ rule "CBI.3.1: Redacted because Table Row contains Vertebrate"
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate"))
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
.forEach(entity -> {
entity.setRedactionReason("Vertebrate found");
entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)");
entity.setRedaction(true);
entity.addEngine(Engine.RULE);
entity.addMatchedRule("CBI.3.1");
entity.addReferences($table.getEntitiesOfTypeInSameRow("vertebrate", entity));
entity.applyWithReferences(
"CBI.3.1",
"Vertebrate found",
"Reg (EC) No 1107/2009 Art. 63 (2g)",
$table.getEntitiesOfTypeInSameRow("vertebrate", entity)
);
});
end
@ -109,10 +116,8 @@ rule "CBI.3.2: Don't redact because Section doesn't contain Vertebrate"
then
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(entity -> {
entity.setRedactionReason("No vertebrate found");
entity.setRedaction(false);
entity.addEngine(Engine.RULE);
entity.addMatchedRule("CBI.3.2");
entity.skip("CBI.3.2", "No vertebrate found");
});
end
@ -123,10 +128,8 @@ rule "CBI.3.3: Dont redact because Table Row doesn't contain Vertebrate"
$table.streamEntitiesWhereRowContainsNoEntitiesOfType(List.of("vertebrate"))
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
.forEach(entity -> {
entity.setRedactionReason("No vertebrate found");
entity.setRedaction(false);
entity.addEngine(Engine.RULE);
entity.addMatchedRule("CBI.3.3");
entity.skip("CBI.3.3", "No vertebrate found");
});
end
@ -141,11 +144,12 @@ rule "CBI.4.0: Dont redact Names and Addresses if no_redaction_indicator is foun
then
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(entity -> {
entity.setRedactionReason("Vertebrate but a no redaction indicator found");
entity.setRedaction(false);
entity.addEngine(Engine.RULE);
entity.addMatchedRule("CBI.4.0");
entity.addReferences($section.getEntitiesOfType("no_redaction_indicator"));
entity.skipWithReferences(
"CBI.4.0",
"Vertebrate but a no redaction indicator found",
$section.getEntitiesOfType("no_redaction_indicator")
);
});
end
@ -158,12 +162,14 @@ rule "CBI.4.1: Dont redact Names and Addresses if no_redaction_indicator is foun
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator"))
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
.forEach(entity -> {
entity.setRedactionReason("Vertebrate but a no redaction indicator found");
entity.setRedaction(false);
entity.addEngine(Engine.RULE);
entity.addMatchedRule("CBI.4.1");
entity.addReferences($table.getEntitiesOfTypeInSameRow("vertebrate", entity));
entity.addReferences($table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity));
entity.skipWithReferences(
"CBI.4.1",
"Vertebrate but a no redaction indicator found",
Stream.concat(
$table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(),
$table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList()
);
});
end
@ -178,13 +184,15 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red
then
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(entity -> {
entity.setRedactionReason("no_redaction_indicator but also redaction_indicator found");
entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)");
entity.setRedaction(true);
entity.addEngine(Engine.RULE);
entity.addMatchedRule("CBI.5.0");
entity.addReferences($section.getEntitiesOfType("no_redaction_indicator"));
entity.addReferences($section.getEntitiesOfType("redaction_indicator"));
entity.applyWithReferences(
"CBI.5.0",
"no_redaction_indicator but also redaction_indicator found",
"Reg (EC) No 1107/2009 Art. 63 (2g)",
Stream.concat(
$section.getEntitiesOfType("vertebrate").stream(),
$section.getEntitiesOfType("no_redaction_indicator").stream()).toList()
);
});
end
@ -197,13 +205,15 @@ rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also red
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no-redaction_indicator"))
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
.forEach(entity -> {
entity.setRedactionReason("no_redaction_indicator but also redaction_indicator found");
entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)");
entity.setRedaction(true);
entity.addEngine(Engine.RULE);
entity.addMatchedRule("CBI.5.1");
entity.addReferences($table.getEntitiesOfTypeInSameRow("vertebrate", entity));
entity.addReferences($table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity));
entity.applyWithReferences(
"CBI.5.1",
"no_redaction_indicator but also redaction_indicator found",
"Reg (EC) No 1107/2009 Art. 63 (2g)",
Stream.concat(
$table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(),
$table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList()
);
});
end
@ -215,12 +225,13 @@ rule "CBI.8.0: Redacted because Section contains must_redact entity"
then
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(entity -> {
entity.setRedactionReason("must_redact entity found");
entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)");
entity.setRedaction(true);
entity.addEngine(Engine.RULE);
entity.addMatchedRule("CBI.8.0");
entity.addReferences($section.getEntitiesOfType("must_redact"));
entity.applyWithReferences(
"CBI.8.0",
"must_redact entity found",
"Reg (EC) No 1107/2009 Art. 63 (2g)",
$section.getEntitiesOfType("must_redact")
);
});
end
@ -231,12 +242,13 @@ rule "CBI.8.1: Redacted because Table Row contains must_redact entity"
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact"))
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
.forEach(entity -> {
entity.setRedactionReason("must_redact entity found");
entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)");
entity.setRedaction(true);
entity.addEngine(Engine.RULE);
entity.addMatchedRule("CBI.8.1");
entity.addReferences($table.getEntitiesOfTypeInSameRow("must_redact", entity));
entity.applyWithReferences(
"CBI.8.1",
"must_redact entity found",
"Reg (EC) No 1107/2009 Art. 63 (2g)",
$table.getEntitiesOfTypeInSameRow("must_redact", entity)
);
});
end
@ -253,11 +265,8 @@ rule "CBI.9.0: Redact all Cell's with Header Author(s) as CBI_author (non verteb
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(redactionEntity -> {
redactionEntity.setRedaction(true);
redactionEntity.addMatchedRule("CBI.9.0");
redactionEntity.apply("CBI.9.0", "Author(s) found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
redactionEntity.addEngine(Engine.RULE);
redactionEntity.setRedactionReason("Author(s) found");
redactionEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
insert(redactionEntity);
});
end
@ -273,11 +282,8 @@ rule "CBI.9.1: Redact all Cell's with Header Author as CBI_author (non vertebrat
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(redactionEntity -> {
redactionEntity.setRedaction(true);
redactionEntity.addMatchedRule("CBI.9.1");
redactionEntity.apply("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
redactionEntity.addEngine(Engine.RULE);
redactionEntity.setRedactionReason("Author found");
redactionEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
insert(redactionEntity);
});
end
@ -308,8 +314,7 @@ rule "CBI.12.0: Add all Cell's with Header Author(s) as CBI_author"
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(redactionEntity -> {
redactionEntity.addMatchedRule("CBI.12.0");
redactionEntity.setRedactionReason("Author(s) header found");
redactionEntity.skip("CBI.12.0", "Author(s) header found");
redactionEntity.addEngine(Engine.RULE);
insert(redactionEntity);
});
@ -322,9 +327,7 @@ rule "CBI.12.1: Dont redact CBI_author, if its row contains a cell with header \
$table.streamEntitiesWhereRowHasHeaderAndAnyValue("Vertebrate study Y/N", List.of("N", "No"))
.filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address")))
.forEach(authorEntity -> {
authorEntity.setRedaction(false);
authorEntity.setRedactionReason("Not redacted because it's row does not belong to a vertebrate study");
authorEntity.addMatchedRule("CBI.12.1");
authorEntity.skip("CBI.12.1", "Not redacted because it's row does not belong to a vertebrate study");
});
end
@ -335,10 +338,7 @@ rule "CBI.12.2: Redact CBI_author, if its row contains a cell with header \"Vert
$table.streamEntitiesWhereRowHasHeaderAndAnyValue("Vertebrate study Y/N", List.of("Y", "Yes"))
.filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address")))
.forEach(authorEntity -> {
authorEntity.setRedaction(true);
authorEntity.setRedactionReason("Redacted because it's row belongs to a vertebrate study");
authorEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)");
authorEntity.addMatchedRule("CBI.12.2");
authorEntity.apply("CBI.12.2", "Redacted because it's row belongs to a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)");
});
end
@ -348,10 +348,7 @@ rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at
when
$sponsorEntity: RedactionEntity(type == "CBI_sponsor", textBefore.contains("batches produced at"))
then
$sponsorEntity.setRedaction(true);
$sponsorEntity.setRedactionReason("Redacted because it represents a sponsor company");
$sponsorEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)");
$sponsorEntity.addMatchedRule("CBI.14.0");
$sponsorEntity.apply("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)");
end
@ -377,10 +374,7 @@ rule "CBI.15.0: Redact row if row contains \"determination of residues\" and liv
$section.getEntitiesOfType(List.of($keyword, $residueKeyword))
.forEach(redactionEntity -> {
redactionEntity.setRedaction(true);
redactionEntity.addMatchedRule("CBI.15.0");
redactionEntity.setRedactionReason("Determination of residues and keyword \"" + $keyword + "\" was found.");
redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)");
redactionEntity.apply("CBI.15.0", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)");
});
end
@ -404,10 +398,7 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio
$table.streamEntitiesWhereRowContainsStringsIgnoreCase(List.of($keyword, $residueKeyword))
.filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address")))
.forEach(redactionEntity -> {
redactionEntity.setRedaction(true);
redactionEntity.addMatchedRule("CBI.15.1");
redactionEntity.setRedactionReason("Determination of residues and keyword \"" + $keyword + "\" was found.");
redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)");
redactionEntity.apply("CBI.15.1", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)");
});
end
@ -421,13 +412,10 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)"
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.setRedaction(true);
entity.setRedactionReason("Author found by \"et al\" regex");
entity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
entity.addMatchedRule("CBI.16.0");
entity.apply("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
entity.addEngine(Engine.RULE);
insert(entity);
dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false);
insert(entity);
});
end
@ -439,10 +427,7 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)"
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.setRedaction(true);
entity.setRedactionReason("Author found by \"et al\" regex");
entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
entity.addMatchedRule("CBI.16.1");
entity.apply("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
entity.addEngine(Engine.RULE);
insert(entity);
dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false);
@ -457,9 +442,8 @@ rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, with
then
entityCreationService.lineAfterString("Source", "CBI_address", EntityType.RECOMMENDATION, $section)
.forEach(entity -> {
entity.setRedactionReason("Line after \"Source\" in Test Organism Section");
entity.addEngine(Engine.RULE);
entity.addMatchedRule("CBI.17.0");
entity.skip("CBI.17.0", "Line after \"Source\" in Test Organism Section");
insert(entity);
});
end
@ -470,9 +454,8 @@ rule "CBI.17.1: Add recommendation for Addresses in Test Organism sections, with
then
entityCreationService.lineAfterString("Source:", "CBI_address", EntityType.RECOMMENDATION, $section)
.forEach(entity -> {
entity.setRedactionReason("Line after \"Source:\" in Test Animals Section");
entity.addEngine(Engine.RULE);
entity.addMatchedRule("CBI.17.1");
entity.skip("CBI.17.1", "Line after \"Source:\" in Test Animals Section");
insert(entity);
});
end
@ -489,7 +472,7 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials"
)
then
RedactionEntity expandedEntity = entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)");
expandedEntity.addMatchedRule("CBI.18.0");
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
$entityToExpand.removeFromGraph();
retract($entityToExpand);
insert(expandedEntity);
@ -502,7 +485,9 @@ rule "CBI.19.0: Expand CBI_author entities with salutation prefix"
$entityToExpand: RedactionEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
then
RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*");
expandedEntity.addMatchedRule("CBI.19.0");
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
$entityToExpand.removeFromGraph();
retract($entityToExpand);
insert(expandedEntity);
end
@ -516,10 +501,8 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC
then
entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section)
.forEach(laboratoryEntity -> {
laboratoryEntity.setRedaction(false);
laboratoryEntity.addMatchedRule("CBI.20.0");
laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study");
laboratoryEntity.addEngine(Engine.RULE);
laboratoryEntity.setRedactionReason("PERFORMING LABORATORY was found for non vertebrate study");
dictionary.addLocalDictionaryEntry(laboratoryEntity);
insert(laboratoryEntity);
});
@ -533,11 +516,8 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC
then
entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section)
.forEach(laboratoryEntity -> {
laboratoryEntity.setRedaction(true);
laboratoryEntity.addMatchedRule("CBI.20.1");
laboratoryEntity.apply("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
laboratoryEntity.addEngine(Engine.RULE);
laboratoryEntity.setRedactionReason("PERFORMING LABORATORY was found");
laboratoryEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.addLocalDictionaryEntry(laboratoryEntity);
insert(laboratoryEntity);
});
@ -552,10 +532,7 @@ rule "PII.0.0: Redact all PII (non vertebrate study)"
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$pii: RedactionEntity(type == "PII", dictionaryEntry)
then
$pii.setRedaction(true);
$pii.setRedactionReason("Personal Information found");
$pii.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
$pii.addMatchedRule("PII.0.0");
$pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "PII.0.1: Redact all PII (vertebrate study)"
@ -563,10 +540,7 @@ rule "PII.0.1: Redact all PII (vertebrate study)"
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$pii: RedactionEntity(type == "PII", dictionaryEntry)
then
$pii.setRedaction(true);
$pii.setRedactionReason("Personal Information found");
$pii.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
$pii.addMatchedRule("PII.0.1");
$pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
@ -578,11 +552,8 @@ rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)"
then
entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section)
.forEach(emailEntity -> {
emailEntity.setRedaction(true);
emailEntity.addEngine(Engine.RULE);
emailEntity.setRedactionReason("Found by Email Regex");
emailEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
emailEntity.addMatchedRule("PII.1.0");
emailEntity.apply("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
insert(emailEntity);
});
end
@ -594,11 +565,8 @@ rule "PII.1.1: Redact Emails by RegEx (vertebrate study)"
then
entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section)
.forEach(emailEntity -> {
emailEntity.setRedaction(true);
emailEntity.addEngine(Engine.RULE);
emailEntity.setRedactionReason("Found by Email Regex");
emailEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
emailEntity.addMatchedRule("PII.1.1");
emailEntity.apply("PII.1.1", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
insert(emailEntity);
});
end
@ -631,11 +599,8 @@ rule "PII.4.0: Redact line after contact information keywords (non vertebrate st
then
entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section)
.forEach(contactEntity -> {
contactEntity.setRedaction(true);
contactEntity.addMatchedRule("PII.4.0");
contactEntity.apply("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)");
contactEntity.addEngine(Engine.RULE);
contactEntity.setRedactionReason("Found after \"" + $contactKeyword + "\" contact keyword");
contactEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)");
insert(contactEntity);
});
end
@ -666,11 +631,8 @@ rule "PII.4.1: Redact line after contact information keywords (non vertebrate st
then
entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section)
.forEach(contactEntity -> {
contactEntity.setRedaction(true);
contactEntity.addMatchedRule("PII.4.1");
contactEntity.apply("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)");
contactEntity.addEngine(Engine.RULE);
contactEntity.setRedactionReason("Found after \"" + $contactKeyword + "\" contact keyword");
contactEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)");
insert(contactEntity);
});
end
@ -687,11 +649,8 @@ rule "PII.6.0: redact line between contact keywords (non vertebrate study)"
entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section)
)
.forEach(contactEntity -> {
contactEntity.setRedaction(true);
contactEntity.addMatchedRule("PII.6.0");
contactEntity.apply("PII.6.0", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002");
contactEntity.addEngine(Engine.RULE);
contactEntity.setRedactionReason("Found between contact keywords");
contactEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
insert(contactEntity);
});
end
@ -706,11 +665,8 @@ rule "PII.6.1: redact line between contact keywords"
entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section)
)
.forEach(contactEntity -> {
contactEntity.setRedaction(true);
contactEntity.addMatchedRule("PII.6.1");
contactEntity.apply("PII.6.1", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002");
contactEntity.addEngine(Engine.RULE);
contactEntity.setRedactionReason("Found between contact keywords");
contactEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
insert(contactEntity);
});
end
@ -733,10 +689,7 @@ rule "PII.7.0: Redact contact information if applicant is found (non vertebrate
entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section)
))
.forEach(entity -> {
entity.setRedaction(true);
entity.setRedactionReason("Applicant information was found");
entity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
entity.addMatchedRule("PII.7.0");
entity.apply("PII.7.0", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
entity.addEngine(Engine.RULE);
insert(entity);
});
@ -758,10 +711,7 @@ rule "PII.7.1: Redact contact information if applicant is found (non vertebrate
entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section)
))
.forEach(entity -> {
entity.setRedaction(true);
entity.setRedactionReason("Applicant information was found");
entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
entity.addMatchedRule("PII.7.1");
entity.apply("PII.7.1", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
entity.addEngine(Engine.RULE);
insert(entity);
});
@ -785,10 +735,7 @@ rule "PII.8.0: Redact contact information if producer is found"
entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section)
))
.forEach(entity -> {
entity.setRedaction(true);
entity.setRedactionReason("Producer was found");
entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)");
entity.addMatchedRule("PII.8.0");
entity.apply("PII.8.0", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
entity.addEngine(Engine.RULE);
insert(entity);
});
@ -810,10 +757,7 @@ rule "PII.8.1: Redact contact information if producer is found"
entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section)
))
.forEach(entity -> {
entity.setRedaction(true);
entity.setRedactionReason("Producer was found");
entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
entity.addMatchedRule("PII.8.1");
entity.apply("PII.8.1", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
entity.addEngine(Engine.RULE);
insert(entity);
});
@ -828,11 +772,8 @@ rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebr
then
entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
.forEach(authorEntity -> {
authorEntity.setRedaction(true);
authorEntity.addMatchedRule("PII.9.0");
authorEntity.apply("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
authorEntity.addEngine(Engine.RULE);
authorEntity.setRedactionReason("AUTHOR(S) was found");
authorEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
insert(authorEntity);
});
end
@ -844,11 +785,8 @@ rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (non v
then
entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
.forEach(authorEntity -> {
authorEntity.setRedaction(true);
authorEntity.addMatchedRule("PII.9.1");
authorEntity.apply("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
authorEntity.addEngine(Engine.RULE);
authorEntity.setRedactionReason("AUTHOR(S) was found");
authorEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
insert(authorEntity);
});
end
@ -860,11 +798,8 @@ rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebr
then
entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
.forEach(authorEntity -> {
authorEntity.setRedaction(true);
authorEntity.addMatchedRule("PII.9.2");
authorEntity.apply("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
authorEntity.addEngine(Engine.RULE);
authorEntity.setRedactionReason("AUTHOR(S) was found");
authorEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
insert(authorEntity);
});
end
@ -876,11 +811,8 @@ rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (verte
then
entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
.forEach(authorEntity -> {
authorEntity.setRedaction(true);
authorEntity.addMatchedRule("PII.9.3");
authorEntity.apply("PII.9.3", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
authorEntity.addEngine(Engine.RULE);
authorEntity.setRedactionReason("AUTHOR(S) was found");
authorEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
insert(authorEntity);
});
end
@ -893,10 +825,7 @@ rule "PII.11.0: Redact On behalf of Sequani Ltd.:"
then
entityCreationService.betweenStrings("On behalf of Sequani Ltd.: Name Title", "On behalf of", "PII", EntityType.ENTITY, $section)
.forEach(authorEntity -> {
authorEntity.setRedaction(true);
authorEntity.addMatchedRule("PII.11.0");
authorEntity.setRedactionReason("On behalf of Sequani Ltd.: Name Title was found");
authorEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
authorEntity.apply("PII.11.0", "On behalf of Sequani Ltd.: Name Title was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
insert(authorEntity);
});
end
@ -908,7 +837,8 @@ rule "PII.12.0: Expand PII entities with salutation prefix"
$entityToExpand: RedactionEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"))
then
RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*");
expandedEntity.addMatchedRule("PII.12.0");
expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList());
expandedEntity.addEngine(Engine.RULE);
insert(expandedEntity);
end
@ -922,11 +852,8 @@ rule "ETC.1.0: Redact Purity"
then
entityCreationService.byRegex("\\bPurity:\\s*(<?>?\\s*\\d{1,2}(?:\\.\\d{1,2})?\\s*%)", "purity", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.addMatchedRule("ETC.1.0");
entity.apply("ETC.1.0", "Purity found", "Reg (EC) No 1107/2009 Art. 63 (2a)");
entity.addEngine(Engine.RULE);
entity.setRedaction(true);
entity.setRedactionReason("Purity found");
entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2a)");
});
end
@ -937,10 +864,7 @@ rule "ETC.2.0: Redact signatures (non vertebrate study)"
not FileAttribute(label == "Vertebrate Study", value == "Yes")
$signature: Image(imageType == ImageType.SIGNATURE)
then
$signature.setRedaction(true);
$signature.setMatchedRule("ETC.2.0");
$signature.setRedactionReason("Signature Found");
$signature.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
$signature.apply("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "ETC.2.0: Redact signatures (vertebrate study)"
@ -948,10 +872,7 @@ rule "ETC.2.0: Redact signatures (vertebrate study)"
FileAttribute(label == "Vertebrate Study", value == "Yes")
$signature: Image(imageType == ImageType.SIGNATURE)
then
$signature.setRedaction(true);
$signature.setMatchedRule("ETC.2.0");
$signature.setRedactionReason("Signature Found");
$signature.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
$signature.apply("ETC.2.0", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
@ -961,10 +882,7 @@ rule "ETC.3.0: Redact logos (vertebrate study)"
not FileAttribute(label == "Vertebrate Study", value == "Yes")
$logo: Image(imageType == ImageType.LOGO)
then
$logo.setRedaction(true);
$logo.setMatchedRule("ETC.3.0");
$logo.setRedactionReason("Logo Found");
$logo.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
$logo.apply("ETC.3.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "ETC.3.1: Redact logos (non vertebrate study)"
@ -972,10 +890,7 @@ rule "ETC.3.1: Redact logos (non vertebrate study)"
FileAttribute(label == "Vertebrate Study", value == "Yes")
$logo: Image(imageType == ImageType.LOGO)
then
$logo.setRedaction(true);
$logo.setMatchedRule("ETC.3.1");
$logo.setRedactionReason("Logo Found");
$logo.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
$logo.apply("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
@ -984,10 +899,7 @@ rule "ETC.4.0: Redact dossier dictionary entries"
when
$dossierRedaction: RedactionEntity(type == "dossier_redaction")
then
$dossierRedaction.setRedaction(true);
$dossierRedaction.addMatchedRule("ETC.4.0");
$dossierRedaction.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
$dossierRedaction.setRedactionReason("Specification of impurity found");
$dossierRedaction.apply("ETC.4.0", "Specification of impurity found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
@ -1012,10 +924,7 @@ rule "ETC.6.0: Redact CAS Number"
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(redactionEntity -> {
redactionEntity.setRedaction(true);
redactionEntity.addMatchedRule("ETC.6.0");
redactionEntity.setRedactionReason("Sample # found in Header");
redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)");
redactionEntity.apply("ETC.6.0", "Sample # found in Header", "Reg (EC) No 1107/2009 Art. 63 (2g)");
insert(redactionEntity);
});
end
@ -1039,10 +948,7 @@ rule "ETC.8.0: Redact formulas (vertebrate study)"
not FileAttribute(label == "Vertebrate Study", value == "Yes")
$logo: Image(imageType == ImageType.FORMULA)
then
$logo.setRedaction(true);
$logo.setMatchedRule("ETC.8.0");
$logo.setRedactionReason("Logo Found");
$logo.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
$logo.apply("ETC.8.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "ETC.8.1: Redact formulas (non vertebrate study)"
@ -1050,10 +956,7 @@ rule "ETC.8.1: Redact formulas (non vertebrate study)"
FileAttribute(label == "Vertebrate Study", value == "Yes")
$logo: Image(imageType == ImageType.FORMULA)
then
$logo.setRedaction(true);
$logo.setMatchedRule("ETC.8.1");
$logo.setRedactionReason("Logo Found");
$logo.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
$logo.apply("ETC.8.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
@ -1130,8 +1033,7 @@ rule "MAN.2.0: Apply force redaction"
ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
$entityToForce: RedactionEntity(matchesAnnotationId($id))
then
$entityToForce.setLegalBasis($legalBasis);
$entityToForce.setRedaction(true);
$entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis);
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
end

View File

@ -68,10 +68,7 @@ rule "Always redact CBI_author"
when
$cbiAuthor: RedactionEntity(type == "CBI_author", entityType == EntityType.ENTITY)
then
$cbiAuthor.addMatchedRule("0");
$cbiAuthor.setRedaction(true);
$cbiAuthor.setRedactionReason("Author found");
$cbiAuthor.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
$cbiAuthor.apply("CBI.0.0", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
// --------------------------------------- PII rules -------------------------------------------------------------------
@ -81,10 +78,7 @@ rule "Always redact PII"
when
$cbiAuthor: RedactionEntity(type == "PII", entityType == EntityType.ENTITY)
then
$cbiAuthor.addMatchedRule("1");
$cbiAuthor.setRedaction(true);
$cbiAuthor.setRedactionReason("PII found");
$cbiAuthor.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
$cbiAuthor.apply("PII.0.0", "PII found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
// --------------------------------------- merging rules -------------------------------------------------------------------
@ -147,7 +141,7 @@ rule "remove Entity of lower rank, when equal boundaries and entityType"
salience 32
when
$higherRank: RedactionEntity($type: type, $entityType: entityType, $boundary: boundary)
$lowerRank: RedactionEntity($boundary == boundary, type != $type, entityType == $entityType, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !redaction)
$lowerRank: RedactionEntity($boundary == boundary, type != $type, entityType == $entityType, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !applied)
then
$lowerRank.removeFromGraph();
retract($lowerRank);