Compare commits
44 Commits
master
...
RED-9859-b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
557990273d | ||
|
|
63041927fc | ||
|
|
10e0c68a1f | ||
|
|
3ea73aa859 | ||
|
|
179ac6d9ad | ||
|
|
bdc6ab7e96 | ||
|
|
e959d60ec0 | ||
|
|
6b6d06d24e | ||
|
|
8ac0657795 | ||
|
|
dad17bb504 | ||
|
|
f445b7fe69 | ||
|
|
0692cc90e4 | ||
|
|
ab114b0920 | ||
|
|
7396c04314 | ||
|
|
305cd8f5ac | ||
|
|
b4ecbde89e | ||
|
|
7c31d4f70b | ||
|
|
f08654a082 | ||
|
|
2cf7f7c7b2 | ||
|
|
a51f10b9d1 | ||
|
|
8c36035655 | ||
|
|
67bb4fe7f9 | ||
|
|
2a9101306c | ||
|
|
6ecac11df5 | ||
|
|
e663fd2f2a | ||
|
|
0ef4087b36 | ||
|
|
bf3ae1606b | ||
|
|
43620f7b52 | ||
|
|
92fc003576 | ||
|
|
ed02a83289 | ||
|
|
78f5aaa54e | ||
|
|
acb5b4c308 | ||
|
|
61ee1c12ca | ||
|
|
abec7ae6bf | ||
|
|
afeddb4d91 | ||
|
|
359c237943 | ||
|
|
9789943f45 | ||
|
|
f096aab156 | ||
|
|
156b102e87 | ||
|
|
180728721a | ||
|
|
fb9d1042ac | ||
|
|
046b4b29b9 | ||
|
|
dce797ef8e | ||
|
|
8b8dab2a18 |
@ -4,7 +4,7 @@ plugins {
|
||||
}
|
||||
|
||||
description = "redaction-service-api-v1"
|
||||
val persistenceServiceVersion = "2.439.0"
|
||||
val persistenceServiceVersion = "2.465.60"
|
||||
|
||||
dependencies {
|
||||
implementation("org.springframework:spring-web:6.0.12")
|
||||
|
||||
@ -12,11 +12,11 @@ plugins {
|
||||
description = "redaction-service-server-v1"
|
||||
|
||||
|
||||
val layoutParserVersion = "0.141.0"
|
||||
val layoutParserVersion = "0.142.6"
|
||||
val jacksonVersion = "2.15.2"
|
||||
val droolsVersion = "9.44.0.Final"
|
||||
val pdfBoxVersion = "3.0.0"
|
||||
val persistenceServiceVersion = "2.444.0"
|
||||
val persistenceServiceVersion = "2.465.60"
|
||||
val springBootStarterVersion = "3.1.5"
|
||||
val springCloudVersion = "4.0.4"
|
||||
val testContainersVersion = "1.19.7"
|
||||
@ -43,6 +43,7 @@ dependencies {
|
||||
implementation("com.iqser.red.commons:storage-commons:2.45.0")
|
||||
implementation("com.knecon.fforesight:tenant-commons:0.24.0")
|
||||
implementation("com.knecon.fforesight:tracing-commons:0.5.0")
|
||||
implementation("com.knecon.fforesight:lifecycle-commons:0.6.0")
|
||||
|
||||
implementation("com.fasterxml.jackson.module:jackson-module-afterburner:${jacksonVersion}")
|
||||
implementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:${jacksonVersion}")
|
||||
@ -130,18 +131,19 @@ tasks.named<BootBuildImage>("bootBuildImage") {
|
||||
}
|
||||
}
|
||||
|
||||
fun parseDroolsImports(droolsFilePath: String): List<String> {
|
||||
|
||||
fun parseDroolsImports(vararg droolsFilePaths: String): List<String> {
|
||||
val imports = mutableListOf<String>()
|
||||
val importPattern = Regex("^import\\s+(com\\.iqser\\.red\\.service\\.redaction\\.v1\\.[\\w.]+);")
|
||||
val desiredPrefix = "com.iqser.red.service.redaction.v1"
|
||||
|
||||
File(droolsFilePath).forEachLine { line ->
|
||||
importPattern.find(line)?.let { matchResult ->
|
||||
val importPath = matchResult.groupValues[1].trim()
|
||||
if (importPath.startsWith(desiredPrefix)) {
|
||||
val formattedPath = importPath.replace('.', '/')
|
||||
imports.add("$formattedPath.java")
|
||||
droolsFilePaths.forEach { filePath ->
|
||||
File(filePath).forEachLine { line ->
|
||||
importPattern.find(line)?.let { matchResult ->
|
||||
val importPath = matchResult.groupValues[1].trim()
|
||||
if (importPath.startsWith(desiredPrefix)) {
|
||||
val formattedPath = importPath.replace('.', '/')
|
||||
imports.add("$formattedPath.java")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -149,7 +151,11 @@ fun parseDroolsImports(droolsFilePath: String): List<String> {
|
||||
return imports
|
||||
}
|
||||
|
||||
val droolsImports = parseDroolsImports("redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl")
|
||||
// Combine imports from both drools files
|
||||
val droolsImports = parseDroolsImports(
|
||||
"redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl",
|
||||
"redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/base_component_rules.drl"
|
||||
)
|
||||
|
||||
tasks.register("generateJavaDoc", Javadoc::class) {
|
||||
|
||||
|
||||
@ -13,6 +13,7 @@ import org.springframework.boot.context.properties.EnableConfigurationProperties
|
||||
import org.springframework.cache.annotation.EnableCaching;
|
||||
import org.springframework.cloud.openfeign.EnableFeignClients;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.EnableAspectJAutoProxy;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.data.mongodb.repository.config.EnableMongoRepositories;
|
||||
|
||||
@ -20,6 +21,7 @@ import com.iqser.red.service.dictionarymerge.commons.DictionaryMergeService;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.mongo.SharedMongoAutoConfiguration;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.knecon.fforesight.lifecyclecommons.LifecycleAutoconfiguration;
|
||||
import com.knecon.fforesight.mongo.database.commons.MongoDatabaseCommonsAutoConfiguration;
|
||||
import com.knecon.fforesight.mongo.database.commons.liquibase.EnableMongoLiquibase;
|
||||
import com.knecon.fforesight.tenantcommons.MultiTenancyAutoConfiguration;
|
||||
@ -32,13 +34,14 @@ import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@EnableCaching
|
||||
@ImportAutoConfiguration({MultiTenancyAutoConfiguration.class, SharedMongoAutoConfiguration.class})
|
||||
@ImportAutoConfiguration({MultiTenancyAutoConfiguration.class, SharedMongoAutoConfiguration.class, LifecycleAutoconfiguration.class})
|
||||
@Import({MetricsConfiguration.class, StorageAutoConfiguration.class, MongoDatabaseCommonsAutoConfiguration.class})
|
||||
@EnableFeignClients(basePackageClasses = RulesClient.class)
|
||||
@EnableConfigurationProperties(RedactionServiceSettings.class)
|
||||
@EnableMongoRepositories(basePackages = "com.iqser.red.service.persistence")
|
||||
@EnableMongoLiquibase
|
||||
@SpringBootApplication(exclude = {SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class, DataSourceAutoConfiguration.class, LiquibaseAutoConfiguration.class, MongoAutoConfiguration.class, MongoDataAutoConfiguration.class})
|
||||
@EnableAspectJAutoProxy
|
||||
public class Application {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
@ -21,6 +21,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
|
||||
@ -119,9 +120,14 @@ public class RedactionLogToEntityLogMigrationService {
|
||||
.filter(MigrationEntity::needsManualEntry)
|
||||
.map(MigrationEntity::buildManualRedactionEntry)
|
||||
.toList();
|
||||
|
||||
idsToMigrateInDb.setManualRedactionEntriesToAdd(manualRedactionEntriesToAdd);
|
||||
|
||||
List<String> manualForceRedactionIdsToDelete = entitiesToMigrate.stream()
|
||||
.filter(MigrationEntity::needsForceDeletion)
|
||||
.map(MigrationEntity::getNewId)
|
||||
.toList();
|
||||
idsToMigrateInDb.setForceRedactionIdsToDelete(manualForceRedactionIdsToDelete);
|
||||
|
||||
return new MigratedEntityLog(idsToMigrateInDb, entityLog);
|
||||
}
|
||||
|
||||
|
||||
@ -23,6 +23,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualChangeFactory;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
||||
@ -220,6 +221,11 @@ public final class MigrationEntity {
|
||||
&& !entityLogEntry.getChanges().isEmpty()
|
||||
&& entityLogEntry.getChanges().stream().map(Change::getType).toList().get(entityLogEntry.getChanges().size() - 1).equals(ChangeType.REMOVED)) {
|
||||
entityLogEntry.setState(EntryState.REMOVED);
|
||||
if (!entityLogEntry.getManualChanges().isEmpty()) {
|
||||
entityLogEntry.getManualChanges()
|
||||
.removeIf(manualChange -> manualChange.getManualRedactionType()
|
||||
.equals(com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.FORCE));
|
||||
}
|
||||
}
|
||||
|
||||
return entityLogEntry;
|
||||
@ -466,4 +472,10 @@ public final class MigrationEntity {
|
||||
.anyMatch(mc -> mc instanceof ManualResizeRedaction && !((ManualResizeRedaction) mc).getUpdateDictionary()) && !(migratedEntity instanceof Image);
|
||||
}
|
||||
|
||||
public boolean needsForceDeletion() {
|
||||
|
||||
return manualChanges.stream()
|
||||
.anyMatch(mc -> mc instanceof ManualForceRedaction) && this.precursorEntity != null && this.precursorEntity.removed();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -120,7 +120,8 @@ public class PrecursorEntity implements IEntity {
|
||||
EntityType entityType = getEntityType(entryType);
|
||||
String value = Optional.ofNullable(importedRedaction.getValue())
|
||||
.orElse("");
|
||||
return PrecursorEntity.builder()
|
||||
|
||||
PrecursorEntityBuilder precursorEntityBuilder = PrecursorEntity.builder()
|
||||
.id(importedRedaction.getId())
|
||||
.value(value)
|
||||
.entityPosition(rectangleWithPages)
|
||||
@ -130,14 +131,21 @@ public class PrecursorEntity implements IEntity {
|
||||
.orElse(""))
|
||||
.type(Optional.ofNullable(importedRedaction.getType())
|
||||
.orElse(IMPORTED_REDACTION_TYPE))
|
||||
.section(importedRedaction.getManualOverwriteSection())
|
||||
.section(Optional.ofNullable(importedRedaction.getSection())
|
||||
.orElse(""))
|
||||
.entityType(entityType)
|
||||
.isDictionaryEntry(false)
|
||||
.isDossierDictionaryEntry(false)
|
||||
.manualOverwrite(new ManualChangeOverwrite(entityType))
|
||||
.rectangle(value.isBlank() || entryType.equals(EntryType.IMAGE) || entryType.equals(EntryType.IMAGE_HINT) || entryType.equals(EntryType.AREA))
|
||||
.manualOverwrite(new ManualChangeOverwrite(entityType, importedRedaction.getManualOverwriteSection()))
|
||||
.engines(Set.of(Engine.IMPORTED))
|
||||
.build();
|
||||
.engines(Set.of(Engine.IMPORTED));
|
||||
|
||||
if (importedRedaction.getManualOverwriteSection() != null && !importedRedaction.getManualOverwriteSection().isEmpty()) {
|
||||
precursorEntityBuilder.section(importedRedaction.getManualOverwriteSection())
|
||||
.manualOverwrite(new ManualChangeOverwrite(entityType, importedRedaction.getManualOverwriteSection()));
|
||||
}
|
||||
|
||||
return precursorEntityBuilder.build();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -288,8 +288,8 @@ public class DocumentTree {
|
||||
if (treeId.isEmpty()) {
|
||||
return root;
|
||||
}
|
||||
Entry entry = root.children.get(treeId.get(0));
|
||||
for (int id : treeId.subList(1, treeId.size())) {
|
||||
Entry entry = root;
|
||||
for (int id : treeId) {
|
||||
entry = entry.children.get(id);
|
||||
}
|
||||
return entry;
|
||||
|
||||
@ -3,8 +3,10 @@ package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
|
||||
@ -35,7 +37,7 @@ public class Page {
|
||||
Integer width;
|
||||
Integer rotation;
|
||||
|
||||
List<SemanticNode> mainBody;
|
||||
List<AtomicTextBlock> textBlocksOnPage;
|
||||
Header header;
|
||||
Footer footer;
|
||||
|
||||
@ -53,13 +55,63 @@ public class Page {
|
||||
*/
|
||||
public TextBlock getMainBodyTextBlock() {
|
||||
|
||||
return mainBody.stream()
|
||||
.filter(SemanticNode::isLeaf)
|
||||
.map(SemanticNode::getTextBlock)
|
||||
return textBlocksOnPage.stream()
|
||||
.filter(atb -> !atb.isEmpty())
|
||||
.collect(new TextBlockCollector());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves the highest SemanticNodes, which appear only on this page. It is achieved by traversing the DocumentTree up, until a SemanticNode's direct parent is no longer exclusively on this page.
|
||||
*
|
||||
* @return A list which contains the highes SemanticNodes, which appear only on this page.
|
||||
*/
|
||||
public List<SemanticNode> getMainBody() {
|
||||
|
||||
return textBlocksOnPage.stream()
|
||||
.map(AtomicTextBlock::getParent)
|
||||
.map(this::getHighestParentOnlyOnPage)
|
||||
.distinct()
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves the highest SemanticNodes which are present on the page. There might be multiples, as two or more Main Sections start on a page.
|
||||
* This is achieved by traversing up the document tree and returning all SemanticNodes whose direct parent is the Document
|
||||
*
|
||||
* @return A list of the highest SemanticNodes present on this page
|
||||
*/
|
||||
public Stream<SemanticNode> streamHighestSemanticNodesOnPage() {
|
||||
|
||||
return textBlocksOnPage.stream()
|
||||
.map(AtomicTextBlock::getParent)
|
||||
.map(this::getHighestSemanticNodeOnPage)
|
||||
.distinct();
|
||||
}
|
||||
|
||||
|
||||
private SemanticNode getHighestParentOnlyOnPage(SemanticNode node) {
|
||||
|
||||
SemanticNode currentNode = node;
|
||||
while (currentNode.hasParent() && currentNode.getParent().onlyOnPage(this)) {
|
||||
currentNode = currentNode.getParent();
|
||||
}
|
||||
return currentNode;
|
||||
}
|
||||
|
||||
|
||||
private SemanticNode getHighestSemanticNodeOnPage(SemanticNode node) {
|
||||
|
||||
SemanticNode currentNode = node;
|
||||
while (currentNode.hasParent() //
|
||||
&& !currentNode.getParent().getType().equals(NodeType.DOCUMENT)) {
|
||||
currentNode = currentNode.getParent();
|
||||
}
|
||||
return currentNode;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
|
||||
@ -1,10 +1,12 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
@ -33,7 +35,6 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true)
|
||||
public class Section extends AbstractSemanticNode {
|
||||
|
||||
|
||||
@Override
|
||||
public NodeType getType() {
|
||||
|
||||
@ -60,7 +61,6 @@ public class Section extends AbstractSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
@ -85,7 +85,14 @@ public class Section extends AbstractSemanticNode {
|
||||
*/
|
||||
public boolean anyHeadlineContainsString(String value) {
|
||||
|
||||
return streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsString(value));
|
||||
boolean found = streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsString(value)) || getHeadline().containsString(value);
|
||||
if (!found) {
|
||||
List<Headline> previousHeadlines = new ArrayList<>();
|
||||
headlinesByPreviousSibling(this, previousHeadlines);
|
||||
return previousHeadlines.stream()
|
||||
.anyMatch(headline -> headline.containsString(value));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@ -97,8 +104,37 @@ public class Section extends AbstractSemanticNode {
|
||||
*/
|
||||
public boolean anyHeadlineContainsStringIgnoreCase(String value) {
|
||||
|
||||
return streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsStringIgnoreCase(value));
|
||||
boolean found = streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsStringIgnoreCase(value)) || getHeadline().containsStringIgnoreCase(value);
|
||||
if (!found) {
|
||||
List<Headline> previousHeadlines = new ArrayList<>();
|
||||
headlinesByPreviousSibling(this, previousHeadlines);
|
||||
return previousHeadlines.stream()
|
||||
.anyMatch(headline -> headline.containsStringIgnoreCase(value));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
private void headlinesByPreviousSibling(SemanticNode section, List<Headline> found) {
|
||||
|
||||
if (section.getPreviousSibling()
|
||||
.isPresent() && section.getPreviousSibling()
|
||||
.get() instanceof Section previousSection) {
|
||||
|
||||
var subnodes = previousSection.streamAllSubNodes()
|
||||
.toList();
|
||||
if (subnodes.size() == 1 && subnodes.get(0) instanceof Headline previousHeadline) {
|
||||
found.add(previousHeadline);
|
||||
headlinesByPreviousSibling(previousSection, found);
|
||||
}
|
||||
}
|
||||
|
||||
if (section.getPreviousSibling()
|
||||
.isPresent() && section.getPreviousSibling()
|
||||
.get() instanceof Headline previousHeadline) {
|
||||
found.add(previousHeadline);
|
||||
headlinesByPreviousSibling(previousHeadline, found);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -808,4 +808,17 @@ public interface SemanticNode {
|
||||
streamChildren().forEach(childNode -> childNode.accept(visitor));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks wether this SemanticNode appears on a single page only, and if that page is the provided one.
|
||||
*
|
||||
* @param page the page to check
|
||||
* @return true, when SemanticNode is on a single page only and the page is the provided page. Otherwise, false.
|
||||
*/
|
||||
default boolean onlyOnPage(Page page) {
|
||||
|
||||
Set<Page> pages = getPages();
|
||||
return pages.size() == 1 && pages.contains(page);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -110,7 +110,14 @@ public class RedactionMessageReceiver {
|
||||
log.info("-------------------------------------------------------------------------------------------------");
|
||||
shouldRespond = false;
|
||||
break;
|
||||
|
||||
case IMPORTED_REDACTIONS_ONLY:
|
||||
log.info("------------------------------Imported Redactions Analysis Only------------------------------------------");
|
||||
log.info("Starting Imported Redactions Analysis Only for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
log.debug(analyzeRequest.getManualRedactions().toString());
|
||||
result = analyzeService.analyzeImportedRedactionsOnly(analyzeRequest);
|
||||
log.info("Successful Imported Redactions Analysis Only dossier {} file {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
log.info("-------------------------------------------------------------------------------------------------");
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("Unknown MessageType: " + analyzeRequest.getMessageType());
|
||||
}
|
||||
|
||||
@ -23,8 +23,10 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileTyp
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.componentlog.ComponentLog;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogChanges;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedLegalBases;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.mapper.ImportedLegalBasisMapper;
|
||||
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel;
|
||||
import com.iqser.red.service.redaction.v1.server.model.KieWrapper;
|
||||
@ -76,6 +78,7 @@ public class AnalyzeService {
|
||||
ImportedRedactionEntryService importedRedactionEntryService;
|
||||
ObservedStorageService observedStorageService;
|
||||
FunctionTimerValues redactmanagerAnalyzePagewiseValues;
|
||||
ImportedLegalBasisMapper importedLegalBasisMapper = ImportedLegalBasisMapper.INSTANCE;
|
||||
|
||||
|
||||
@Timed("redactmanager_reanalyze")
|
||||
@ -128,7 +131,7 @@ public class AnalyzeService {
|
||||
document,
|
||||
document.getNumberOfPages(),
|
||||
true,
|
||||
Collections.emptySet());
|
||||
new HashSet<>());
|
||||
}
|
||||
|
||||
KieWrapper kieWrapperEntityRules = kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.ENTITY);
|
||||
@ -245,6 +248,39 @@ public class AnalyzeService {
|
||||
}
|
||||
|
||||
|
||||
@Timed("redactmanager_analyzeImportedRedactionsOnly")
|
||||
@Observed(name = "AnalyzeService", contextualName = "analyzeImportedRedactionsOnly")
|
||||
public AnalyzeResult analyzeImportedRedactionsOnly(AnalyzeRequest analyzeRequest) {
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
|
||||
Document document = DocumentGraphMapper.toDocumentGraph(observedStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
|
||||
log.info("Loaded Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
ImportedRedactions importedRedactions = redactionStorageService.getImportedRedactions(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
log.info("Loaded Imported Redactions for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
ImportedLegalBases importedLegalBases = redactionStorageService.getImportedLegalBases(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
log.info("Loaded Imported Legal Bases for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
var notFoundImportedEntries = importedRedactionEntryService.addImportedEntriesAndReturnNotFoundEntries(analyzeRequest, importedRedactions, document);
|
||||
|
||||
EntityLogChanges entityLogChanges = entityLogCreatorService.createInitialEntityLog(analyzeRequest, document, notFoundImportedEntries, new DictionaryVersion(0, 0), 0);
|
||||
|
||||
entityLogChanges.getEntityLog()
|
||||
.setLegalBasis(importedLegalBases.getImportedLegalBases()
|
||||
.stream()
|
||||
.map(importedLegalBasisMapper::toEntityLogLegalBasis)
|
||||
.toList());
|
||||
|
||||
notFoundImportedEntitiesService.processEntityLog(entityLogChanges.getEntityLog(), analyzeRequest, notFoundImportedEntries);
|
||||
|
||||
return finalizeAnalysis(analyzeRequest, startTime, KieWrapper.empty(), entityLogChanges, document, document.getNumberOfPages(), false, new HashSet<>());
|
||||
}
|
||||
|
||||
|
||||
private AnalyzeResult finalizeAnalysis(AnalyzeRequest analyzeRequest,
|
||||
long startTime,
|
||||
KieWrapper kieWrapperComponentRules,
|
||||
@ -367,7 +403,7 @@ public class AnalyzeService {
|
||||
|
||||
return new NerEntitiesModel(nerEntitiesModel.getData().entrySet()
|
||||
.stream() //
|
||||
.filter(entry -> sectionsToReanalyseIds.contains(entry.getKey())) //
|
||||
.filter(entry -> sectionsToReanalyseIds.contains(getSuperSectionID(entry.getKey()))) //
|
||||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
|
||||
}
|
||||
|
||||
@ -383,4 +419,11 @@ public class AnalyzeService {
|
||||
return nerEntities;
|
||||
}
|
||||
|
||||
|
||||
private static Integer getSuperSectionID(String section) {
|
||||
|
||||
return NerEntitiesAdapter.sectionNumberToTreeId(section)
|
||||
.get(0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -223,7 +223,6 @@ public class EntityLogCreatorService {
|
||||
|
||||
String type = precursorEntity.getManualOverwrite().getType()
|
||||
.orElse(precursorEntity.getType());
|
||||
boolean isHint = isHint(precursorEntity.getEntityType());
|
||||
return EntityLogEntry.builder()
|
||||
.id(precursorEntity.getId())
|
||||
.reason(precursorEntity.buildReasonWithManualChangeDescriptions())
|
||||
|
||||
@ -40,7 +40,8 @@ public class ComponentMappingFileSystemCache {
|
||||
public File getComponentMappingFile(ComponentMappingMetadata metadata) {
|
||||
|
||||
Path mappingFile = getMappingFileFromMetadata(metadata);
|
||||
Path mappingFileMetaDataFile = mappingFile.resolveSibling(metadata.getName() + METADATA_SUFFIX);
|
||||
Path mappingFileMetaDataFile = getMappingMetadataFileFromMetadata(metadata);
|
||||
|
||||
synchronized (ComponentMappingFileSystemCache.class) {
|
||||
|
||||
if (fileExistsAndUpToDate(metadata, mappingFile, mappingFileMetaDataFile)) {
|
||||
@ -52,7 +53,7 @@ public class ComponentMappingFileSystemCache {
|
||||
InputStreamResource inputStreamResource = storageService.getObject(TenantContext.getTenantId(), metadata.getStorageId());
|
||||
|
||||
Files.write(mappingFile, inputStreamResource.getContentAsByteArray(), StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.CREATE);
|
||||
|
||||
|
||||
mapper.writeValue(mappingFileMetaDataFile.toFile(), metadata);
|
||||
|
||||
return mappingFile.toFile();
|
||||
@ -87,6 +88,13 @@ public class ComponentMappingFileSystemCache {
|
||||
}
|
||||
|
||||
|
||||
private Path getMappingMetadataFileFromMetadata(ComponentMappingMetadata metadata) {
|
||||
|
||||
Path tenantStem = mappingFileDir.resolve(TenantContext.getTenantId());
|
||||
return tenantStem.resolve(metadata.getStorageId() + METADATA_SUFFIX);
|
||||
}
|
||||
|
||||
|
||||
private boolean fileExistsAndUpToDate(ComponentMappingMetadata metadata, Path mappingFile, Path mappingFileMetaDataFile) {
|
||||
|
||||
if (mappingFile.toFile().exists() && mappingFile.toFile().isFile() && mappingFileMetaDataFile.toFile().exists() && mappingFileMetaDataFile.toFile().isFile()) {
|
||||
|
||||
@ -4,6 +4,7 @@ import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.Reader;
|
||||
import java.nio.charset.Charset;
|
||||
import java.time.Duration;
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
@ -36,7 +37,7 @@ public class ComponentMappingMemoryCache {
|
||||
public ComponentMappingMemoryCache(ComponentMappingFileSystemCache componentMappingFileSystemCache) {
|
||||
|
||||
this.fileSystemCache = componentMappingFileSystemCache;
|
||||
cache = CacheBuilder.newBuilder().maximumWeight(MAX_NUMBER_OF_LINES).weigher(COMPONENT_MAPPING_WEIGHER).build();
|
||||
cache = CacheBuilder.newBuilder().maximumWeight(MAX_NUMBER_OF_LINES).weigher(COMPONENT_MAPPING_WEIGHER).expireAfterAccess(Duration.ofDays(1)).build();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
package com.iqser.red.service.redaction.v1.server.service.document;
|
||||
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
@ -64,7 +63,7 @@ public class DocumentGraphMapper {
|
||||
for (DocumentStructure.EntryData entryData : entries) {
|
||||
|
||||
List<Page> pages = Arrays.stream(entryData.getPageNumbers())
|
||||
.map(pageNumber -> getPage(pageNumber, context))
|
||||
.map(context::getPage)
|
||||
.toList();
|
||||
|
||||
SemanticNode node = switch (entryData.getType()) {
|
||||
@ -83,6 +82,15 @@ public class DocumentGraphMapper {
|
||||
if (entryData.getAtomicBlockIds().length > 0) {
|
||||
TextBlock textBlock = toTextBlock(entryData.getAtomicBlockIds(), context, node);
|
||||
node.setLeafTextBlock(textBlock);
|
||||
|
||||
switch (entryData.getType()) {
|
||||
case HEADER -> pages.forEach(page -> page.setHeader((Header) node));
|
||||
case FOOTER -> pages.forEach(page -> page.setFooter((Footer) node));
|
||||
case IMAGE -> pages.forEach(page -> page.getImages().add((Image) node));
|
||||
default -> textBlock.getAtomicTextBlocks()
|
||||
.forEach(atb -> atb.getPage().getTextBlocksOnPage().add(atb));
|
||||
}
|
||||
|
||||
}
|
||||
List<Integer> treeId = Arrays.stream(entryData.getTreeId()).boxed()
|
||||
.toList();
|
||||
@ -94,13 +102,8 @@ public class DocumentGraphMapper {
|
||||
}
|
||||
node.setTreeId(treeId);
|
||||
|
||||
switch (entryData.getType()) {
|
||||
case HEADER -> pages.forEach(page -> page.setHeader((Header) node));
|
||||
case FOOTER -> pages.forEach(page -> page.setFooter((Footer) node));
|
||||
default -> pages.forEach(page -> page.getMainBody().add(node));
|
||||
}
|
||||
|
||||
newEntries.add(DocumentTree.Entry.builder().treeId(treeId).children(buildEntries(entryData.getChildren(), context)).node(node).build());
|
||||
|
||||
}
|
||||
return newEntries;
|
||||
}
|
||||
@ -115,7 +118,7 @@ public class DocumentGraphMapper {
|
||||
private Image buildImage(Context context, Map<String, String> properties, Long[] pageNumbers) {
|
||||
|
||||
assert pageNumbers.length == 1;
|
||||
Page page = getPage(pageNumbers[0], context);
|
||||
Page page = context.getPage(pageNumbers[0]);
|
||||
var builder = Image.builder();
|
||||
PropertiesMapper.parseImageProperties(properties, builder);
|
||||
return builder.documentTree(context.documentTree).page(page).build();
|
||||
@ -161,6 +164,7 @@ public class DocumentGraphMapper {
|
||||
return SuperSection.builder().documentTree(context.documentTree).build();
|
||||
}
|
||||
|
||||
|
||||
private Paragraph buildParagraph(Context context, Map<String, String> properties) {
|
||||
|
||||
if (PropertiesMapper.isDuplicateParagraph(properties)) {
|
||||
@ -189,21 +193,13 @@ public class DocumentGraphMapper {
|
||||
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)),
|
||||
context.documentPositionData.get(Math.toIntExact(atomicTextBlockId)),
|
||||
parent,
|
||||
getPage(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)).getPage(), context));
|
||||
context.getPage(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)).getPage()));
|
||||
}
|
||||
|
||||
|
||||
private Page buildPage(DocumentPage p) {
|
||||
|
||||
return Page.builder().rotation(p.getRotation()).height(p.getHeight()).width(p.getWidth()).number(p.getNumber()).mainBody(new LinkedList<>()).build();
|
||||
}
|
||||
|
||||
|
||||
private Page getPage(Long pageIndex, Context context) {
|
||||
|
||||
Page page = context.pageData.get(Math.toIntExact(pageIndex) - 1);
|
||||
assert page.getNumber() == Math.toIntExact(pageIndex);
|
||||
return page;
|
||||
return Page.builder().rotation(p.getRotation()).height(p.getHeight()).width(p.getWidth()).number(p.getNumber()).textBlocksOnPage(new LinkedList<>()).build();
|
||||
}
|
||||
|
||||
|
||||
@ -226,6 +222,14 @@ public class DocumentGraphMapper {
|
||||
|
||||
}
|
||||
|
||||
|
||||
private Page getPage(Long pageIndex) {
|
||||
|
||||
Page page = pageData.get(Math.toIntExact(pageIndex) - 1);
|
||||
assert page.getNumber() == Math.toIntExact(pageIndex);
|
||||
return page;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -614,21 +614,21 @@ public class EntityCreationService {
|
||||
/**
|
||||
* Looks across the remaining table row to the right of the provided TableCell if any line intersects the y coordinates of the found text.
|
||||
*
|
||||
* @param TextRanges a list of textRanges
|
||||
* @param textRanges a list of textRanges
|
||||
* @param tableCell the table cell
|
||||
* @param type the type
|
||||
* @param entityType the entity type
|
||||
* @param tableNode the table node
|
||||
* @return a stream of RedactionEntities
|
||||
*/
|
||||
private Stream<TextEntity> lineAfterBoundariesAcrossColumns(List<TextRange> TextRanges, TableCell tableCell, String type, EntityType entityType, Table tableNode) {
|
||||
private Stream<TextEntity> lineAfterBoundariesAcrossColumns(List<TextRange> textRanges, TableCell tableCell, String type, EntityType entityType, Table tableNode) {
|
||||
|
||||
return TextRanges.stream()
|
||||
return textRanges.stream()
|
||||
.map(boundary -> RectangleTransformations.rectangle2DBBox(tableCell.getTextBlock().getPositions(boundary)))
|
||||
.map(bBox -> Pair.of(bBox.getMaxY(), bBox.getMinY()))
|
||||
.map(maxMinPair -> tableNode.streamRow(tableCell.getRow())
|
||||
.filter(nextTableCell -> nextTableCell.getCol() > tableCell.getCol())
|
||||
.map(nextTableCell -> RedactionSearchUtility.findTextRangesOfAllLinesInYRange(maxMinPair.getLeft(), maxMinPair.getRight(), nextTableCell.getTextBlock()))
|
||||
.map(nextTableCell -> RedactionSearchUtility.findTextRangesOfAllLinesWithCloseYCoordinates(maxMinPair.getLeft(), maxMinPair.getRight(), nextTableCell.getTextBlock()))
|
||||
.map(b -> b.trim(tableNode.getTextBlock()))
|
||||
.filter(boundary -> isValidEntityTextRange(tableNode.getTextBlock(), boundary))
|
||||
.map(boundary -> byTextRange(boundary, type, entityType, tableNode))
|
||||
@ -1160,6 +1160,10 @@ public class EntityCreationService {
|
||||
|
||||
if (kieSession != null) {
|
||||
kieSession.insert(textEntity);
|
||||
textEntity.getIntersectingNodes()
|
||||
.stream()
|
||||
.filter(nodesInKieSession::contains)
|
||||
.forEach(o -> kieSession.update(kieSession.getFactHandle(o), o));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -11,10 +11,12 @@ import java.util.stream.Collectors;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
|
||||
import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
@ -35,6 +37,7 @@ public class EntityFromPrecursorCreationService {
|
||||
static double MATCH_THRESHOLD = 10; // Is compared to the average sum of distances in pdf coordinates for each corner of the bounding box of the entities
|
||||
EntityFindingUtility entityFindingUtility;
|
||||
DictionaryService dictionaryService;
|
||||
RedactionServiceSettings settings;
|
||||
|
||||
|
||||
public List<PrecursorEntity> createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions manualRedactions, SemanticNode node, String dossierTemplateId) {
|
||||
@ -92,7 +95,7 @@ public class EntityFromPrecursorCreationService {
|
||||
notFoundEntities.add(precursorEntity);
|
||||
continue;
|
||||
}
|
||||
createCorrectEntity(precursorEntity, optionalClosestEntity.get());
|
||||
createCorrectEntity(precursorEntity, optionalClosestEntity.get(), settings.isAnnotationMode());
|
||||
}
|
||||
|
||||
tempEntitiesByValue.values()
|
||||
@ -125,12 +128,16 @@ public class EntityFromPrecursorCreationService {
|
||||
precursorEntity.getEntityType(),
|
||||
closestEntity.getDeepestFullyContainingNode());
|
||||
} else {
|
||||
correctEntity = TextEntity.initialEntityNode(closestEntity.getTextRange(),
|
||||
precursorEntity.type(),
|
||||
precursorEntity.getEntityType(),
|
||||
precursorEntity.getId(),
|
||||
precursorEntity.getManualOverwrite().getSection()
|
||||
.orElse(null));
|
||||
String section = precursorEntity.getManualOverwrite().getSection()
|
||||
.orElse(null);
|
||||
if ((section == null || section.isBlank())
|
||||
&& precursorEntity.getSection() != null
|
||||
&& !precursorEntity.getSection().isBlank()
|
||||
&& precursorEntity.getEngines().contains(Engine.IMPORTED)) {
|
||||
section = precursorEntity.getSection();
|
||||
}
|
||||
|
||||
correctEntity = TextEntity.initialEntityNode(closestEntity.getTextRange(), precursorEntity.type(), precursorEntity.getEntityType(), precursorEntity.getId(), section);
|
||||
}
|
||||
correctEntity.setDeepestFullyContainingNode(closestEntity.getDeepestFullyContainingNode());
|
||||
correctEntity.setIntersectingNodes(new ArrayList<>(closestEntity.getIntersectingNodes()));
|
||||
|
||||
@ -177,7 +177,7 @@ public class NerEntitiesAdapter {
|
||||
}
|
||||
|
||||
|
||||
private static List<Integer> sectionNumberToTreeId(String sectionNumber) {
|
||||
public static List<Integer> sectionNumberToTreeId(String sectionNumber) {
|
||||
|
||||
return Arrays.stream(sectionNumber.split("\\."))
|
||||
.map(Integer::parseInt)
|
||||
|
||||
@ -11,7 +11,6 @@ import java.util.stream.Stream;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
|
||||
@ -27,7 +26,6 @@ import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryIncr
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryIncrementValue;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
|
||||
|
||||
import io.micrometer.core.annotation.Timed;
|
||||
import lombok.AccessLevel;
|
||||
@ -51,7 +49,6 @@ public class SectionFinderService {
|
||||
long start = System.currentTimeMillis();
|
||||
Set<Integer> sectionsToReanalyse = new HashSet<>();
|
||||
|
||||
|
||||
var dictionaryIncrementsSearch = new SearchImplementation(dictionaryIncrement.getValues()
|
||||
.stream()
|
||||
.map(DictionaryIncrementValue::getValue)
|
||||
@ -82,9 +79,7 @@ public class SectionFinderService {
|
||||
return document.getPages()
|
||||
.stream()
|
||||
.filter(page -> relevantPagesForReanalysis.contains(page.getNumber()))
|
||||
.flatMap(page -> Stream.concat(page.getMainBody()
|
||||
.stream()
|
||||
.filter(node -> node.getType().equals(NodeType.SECTION)), Stream.of(page.getHeader(), page.getFooter())))
|
||||
.flatMap(page -> Stream.concat(page.streamHighestSemanticNodesOnPage(), Stream.of(page.getHeader(), page.getFooter())))
|
||||
.map(node -> node.getTreeId()
|
||||
.get(0))
|
||||
.toList();
|
||||
|
||||
@ -73,9 +73,10 @@ public class ComponentDroolsExecutionService {
|
||||
entities.add(Entity.fromEntityLogEntry(entry, document, entry.getStartOffset(), entry.getEndOffset()));
|
||||
if (entry.getDuplicatedTextRanges() != null && !entry.getDuplicatedTextRanges().isEmpty()) {
|
||||
entry.getDuplicatedTextRanges()
|
||||
.forEach(duplicatedTextRange -> {
|
||||
entities.add(Entity.fromEntityLogEntry(entry, document, duplicatedTextRange.getStart(), duplicatedTextRange.getEnd()));
|
||||
});
|
||||
.forEach(duplicatedTextRange -> entities.add(Entity.fromEntityLogEntry(entry,
|
||||
document,
|
||||
duplicatedTextRange.getStart(),
|
||||
duplicatedTextRange.getEnd())));
|
||||
}
|
||||
return entities.stream();
|
||||
})
|
||||
@ -94,8 +95,7 @@ public class ComponentDroolsExecutionService {
|
||||
});
|
||||
|
||||
try {
|
||||
completableFuture.orTimeout(settings.getDroolsExecutionTimeoutSecs(document.getNumberOfPages()), TimeUnit.SECONDS)
|
||||
.get();
|
||||
completableFuture.get(settings.getDroolsExecutionTimeoutSecs(document.getNumberOfPages()), TimeUnit.SECONDS);
|
||||
} catch (ExecutionException e) {
|
||||
kieSession.dispose();
|
||||
if (e.getCause() instanceof TimeoutException) {
|
||||
@ -105,6 +105,8 @@ public class ComponentDroolsExecutionService {
|
||||
} catch (InterruptedException e) {
|
||||
kieSession.dispose();
|
||||
throw new RuntimeException(e);
|
||||
} catch (TimeoutException e) {
|
||||
throw new DroolsTimeoutException(e, false, RuleFileType.COMPONENT);
|
||||
}
|
||||
|
||||
List<FileAttribute> resultingFileAttributes = getFileAttributes(kieSession);
|
||||
|
||||
@ -45,7 +45,7 @@ public class DroolsValidationService {
|
||||
private final KieContainerCreationService kieContainerCreationService;
|
||||
private final DeprecatedElementsFinder deprecatedElementsFinder;
|
||||
private static final Pattern allowedImportsPattern = Pattern.compile(
|
||||
"^(?:import\\s+static\\s+)?(?:import\\s+)?(?:com\\.knecon\\.fforesight|com\\.iqser\\.red|java\\.util)\\..*;$");
|
||||
"^(?:import\\s+static\\s+)?(?:import\\s+)?(?:com\\.knecon\\.fforesight|com\\.iqser\\.red|java\\.util|java\\.text)\\..*;$");
|
||||
public static final String LINEBREAK_MATCHER = "\\R";
|
||||
|
||||
|
||||
@ -283,7 +283,9 @@ public class DroolsValidationService {
|
||||
|
||||
private DroolsBlacklistErrorMessage checkAndGetBlackListedMessages(SearchImplementation blacklistedKeywordSearchImplementation, String stringToCheck, int lineIndexStart) {
|
||||
|
||||
String sanitizedRuleText = StringUtils.deleteWhitespace(stringToCheck);
|
||||
String nonWhitespaceRuleText = StringUtils.deleteWhitespace(stringToCheck);
|
||||
String sanitizedRuleText= nonWhitespaceRuleText.replaceAll("\"(\\\\.|[^\"\\\\])*\"|'(\\\\.|[^'\\\\])*'" ,"");
|
||||
|
||||
List<SearchImplementation.MatchPosition> matches = blacklistedKeywordSearchImplementation.getMatches(sanitizedRuleText);
|
||||
|
||||
if (!matches.isEmpty()) {
|
||||
|
||||
@ -129,8 +129,7 @@ public class EntityDroolsExecutionService {
|
||||
});
|
||||
|
||||
try {
|
||||
completableFuture.orTimeout(settings.getDroolsExecutionTimeoutSecs(document.getNumberOfPages()), TimeUnit.SECONDS)
|
||||
.get();
|
||||
completableFuture.get(settings.getDroolsExecutionTimeoutSecs(document.getNumberOfPages()), TimeUnit.SECONDS);
|
||||
} catch (ExecutionException e) {
|
||||
kieSession.dispose();
|
||||
if (e.getCause() instanceof TimeoutException) {
|
||||
@ -140,6 +139,8 @@ public class EntityDroolsExecutionService {
|
||||
} catch (InterruptedException e) {
|
||||
kieSession.dispose();
|
||||
throw new RuntimeException(e);
|
||||
} catch (TimeoutException e) {
|
||||
throw new DroolsTimeoutException(e, false, RuleFileType.ENTITY);
|
||||
}
|
||||
|
||||
List<FileAttribute> resultingFileAttributes = getFileAttributes(kieSession);
|
||||
|
||||
@ -32,7 +32,7 @@ import lombok.experimental.UtilityClass;
|
||||
public class RuleFileParser {
|
||||
|
||||
private final static Pattern ruleIdentifierInCodeFinder = Pattern.compile(
|
||||
"\\b(?:redact|apply|skip|remove|ignore|applyWithLineBreaks|applyWithReferences|skipWithReferences)\\s*\\(\"([a-zA-Z0-9]+.\\d+.\\d+)\",.*(?:, .*)?\\)");
|
||||
"\\b(?:redact|apply|skip|remove|ignore|applyWithLineBreaks|applyWithReferences|skipWithReferences)\\s*\\(\\s*\"([a-zA-Z0-9]+.\\d+.\\d+)\"\\s*,\\s*.*(?:\\s*,\\s*.*)\\s*?\\)");
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
@ -78,7 +78,8 @@ public class RuleFileParser {
|
||||
.map(GlobalDescr::getLine)
|
||||
.orElse(0),
|
||||
allQueries,
|
||||
ruleClasses, customDroolsValidation);
|
||||
ruleClasses,
|
||||
customDroolsValidation);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -9,10 +9,12 @@ import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.springframework.cache.annotation.Cacheable;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.componentlog.ComponentLog;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedLegalBases;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactionsPerPage;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
@ -147,6 +149,21 @@ public class RedactionStorageService {
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Timed("redactmanager_getImportedLegalBases")
|
||||
public ImportedLegalBases getImportedLegalBases(String dossierId, String fileId) {
|
||||
|
||||
try {
|
||||
return storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMPORTED_LEGAL_BASES),
|
||||
ImportedLegalBases.class);
|
||||
} catch (StorageObjectDoesNotExist e) {
|
||||
log.debug("Imported legal bases not available.");
|
||||
return new ImportedLegalBases();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Deprecated(forRemoval = true)
|
||||
@Timed("redactmanager_getRedactionLog")
|
||||
public RedactionLog getRedactionLog(String dossierId, String fileId) {
|
||||
|
||||
@ -18,7 +18,7 @@ public class MigratedIdsCollector implements Collector<MigrationEntity, Migrated
|
||||
@Override
|
||||
public Supplier<MigratedIds> supplier() {
|
||||
|
||||
return () -> new MigratedIds(new LinkedList<>(), Collections.emptyList());
|
||||
return () -> new MigratedIds(new LinkedList<>(), Collections.emptyList(), Collections.emptyList());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -3,8 +3,10 @@ package com.iqser.red.service.redaction.v1.server.utils;
|
||||
import static java.lang.String.format;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.awt.geom.RectangularShape;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.IntStream;
|
||||
@ -154,14 +156,8 @@ public class RedactionSearchUtility {
|
||||
*/
|
||||
public static TextRange findTextRangesOfAllLinesInYRange(double maxY, double minY, TextBlock textBlock) {
|
||||
|
||||
List<TextRange> lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed()
|
||||
.map(textBlock::getLineTextRange)
|
||||
.filter(lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary))
|
||||
.toList();
|
||||
if (lineBoundaries.isEmpty()) {
|
||||
return new TextRange(textBlock.getTextRange().start(), textBlock.getTextRange().start());
|
||||
}
|
||||
return TextRange.merge(lineBoundaries);
|
||||
Predicate<TextRange> isWithinYRange = lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary);
|
||||
return filterLineBoundaries(textBlock, isWithinYRange);
|
||||
}
|
||||
|
||||
|
||||
@ -172,6 +168,49 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies all lines within a text block that have roughly the same vertical coordinates.
|
||||
*
|
||||
* @param maxY The maximum Y-coordinate of the vertical range.
|
||||
* @param minY The minimum Y-coordinate of the vertical range.
|
||||
* @param textBlock The text block containing the lines to be checked.
|
||||
* @return A {@link TextRange} encompassing all lines within the specified Y-coordinate range.
|
||||
*/
|
||||
public static TextRange findTextRangesOfAllLinesWithCloseYCoordinates(Double maxY, Double minY, TextBlock textBlock) {
|
||||
|
||||
double averageLineHeight = IntStream.range(0, textBlock.numberOfLines()).boxed()
|
||||
.map(textBlock::getLineTextRange)
|
||||
.flatMap((TextRange stringTextRange) -> textBlock.getPositions(stringTextRange)
|
||||
.stream())
|
||||
.map(RectangularShape::getHeight)
|
||||
.mapToDouble(Double::doubleValue).average()
|
||||
.orElse(0);
|
||||
Predicate<TextRange> hasCloseYRange = lineBoundary -> areYCoordinatesClose(maxY, minY, textBlock, lineBoundary, averageLineHeight);
|
||||
|
||||
return filterLineBoundaries(textBlock, hasCloseYRange);
|
||||
}
|
||||
|
||||
|
||||
private static boolean areYCoordinatesClose(Double maxY, Double minY, TextBlock textBlock, TextRange lineTextRange, double averageLineHeight) {
|
||||
|
||||
Rectangle2D lineBBox = RectangleTransformations.rectangle2DBBox(textBlock.getPositions(lineTextRange));
|
||||
return Math.abs(lineBBox.getMinY() - minY) <= averageLineHeight && Math.abs(maxY - lineBBox.getMaxY()) <= averageLineHeight;
|
||||
}
|
||||
|
||||
|
||||
private static TextRange filterLineBoundaries(TextBlock textBlock, Predicate<TextRange> textRangePredicate) {
|
||||
|
||||
List<TextRange> lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed()
|
||||
.map(textBlock::getLineTextRange)
|
||||
.filter(textRangePredicate)
|
||||
.toList();
|
||||
if (lineBoundaries.isEmpty()) {
|
||||
return new TextRange(textBlock.getTextRange().start(), textBlock.getTextRange().start());
|
||||
}
|
||||
return TextRange.merge(lineBoundaries);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Finds TextRanges matching a regex pattern within a TextBlock.
|
||||
*
|
||||
|
||||
@ -16,6 +16,9 @@ project.version: 1.0-SNAPSHOT
|
||||
server:
|
||||
port: 8080
|
||||
|
||||
lifecycle:
|
||||
base-package: com.iqser.red.service.redaction
|
||||
|
||||
spring:
|
||||
application:
|
||||
name: redaction-service
|
||||
|
||||
@ -14,6 +14,8 @@ dd MMMM yyyy
|
||||
d MMMM yyyy
|
||||
MMMM dd, yyyy
|
||||
MMMM d, yyyy
|
||||
MMMM, d yyyy
|
||||
MMMM d,yyyy
|
||||
dd.MM.yyyy
|
||||
d.MM.yyyy
|
||||
yyyy/MM/dd
|
||||
@ -28,6 +30,7 @@ dd['.'] MMM yyyy
|
||||
d['.'] MMM yyyy
|
||||
dd['th']['st']['nd']['rd'] 'of' MMMM, yyyy
|
||||
d['th']['st']['nd']['rd'] 'of' MMMM, yyyy
|
||||
d['st']['nd']['rd']['th'] MMMM yyyy
|
||||
MMMM dd['th']['st']['nd']['rd'], yyyy
|
||||
MMMM d['th']['st']['nd']['rd'], yyyy
|
||||
yyyy, MMMM dd
|
||||
@ -72,3 +75,5 @@ dd.MM.yy
|
||||
d.MM.yy
|
||||
dd MMM. yyyy
|
||||
d MMM. yyyy
|
||||
d-MMMM-yyyy
|
||||
dd-MMMM-yyyy
|
||||
|
||||
@ -114,13 +114,14 @@ public abstract class AbstractRedactionIntegrationTest {
|
||||
public static final String PII_TYPE_ID = DICTIONARY_PII + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String TEST_METHOD_TYPE_ID = TEST_METHOD_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String PUBLISHED_INFORMATION_TYPE_ID = PUBLISHED_INFORMATION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String DOSSIER_PUBLISHED_INFORMATION_TYPE_ID = PUBLISHED_INFORMATION_TYPE_ID+ ":" + TEST_DOSSIER_ID;
|
||||
public static final String DOSSIER_PUBLISHED_INFORMATION_TYPE_ID = PUBLISHED_INFORMATION_TYPE_ID + ":" + TEST_DOSSIER_ID;
|
||||
public static final String MUST_REDACT_TYPE_ID = MUST_REDACT_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String HINT_ONLY_TYPE_ID = HINT_ONLY_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String REDACTION_TYPE_ID = REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String NO_REDACTION_TYPE_ID = NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String SPONSOR_TYPE_ID = DICTIONARY_SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String AUTHOR_TYPE_ID = DICTIONARY_AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String DOSSIER_AUTHOR_TYPE_ID = AUTHOR_TYPE_ID + ":" + TEST_DOSSIER_ID;
|
||||
public static final String ADDRESS_TYPE_ID = DICTIONARY_ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String VERTEBRATE_TYPE_ID = VERTEBRATE_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
|
||||
@ -250,8 +251,10 @@ public abstract class AbstractRedactionIntegrationTest {
|
||||
true));
|
||||
when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(IMPORTED_REDACTION_INDICATOR,
|
||||
true));
|
||||
when(dictionaryClient.getDictionaryForType(DOSSIER_PUBLISHED_INFORMATION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PUBLISHED_INFORMATION_INDICATOR,
|
||||
true));
|
||||
when(dictionaryClient.getDictionaryForType(DOSSIER_PUBLISHED_INFORMATION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
|
||||
PUBLISHED_INFORMATION_INDICATOR,
|
||||
true));
|
||||
when(dictionaryClient.getDictionaryForType(DOSSIER_AUTHOR_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(DICTIONARY_AUTHOR, true));
|
||||
|
||||
}
|
||||
|
||||
@ -350,6 +353,7 @@ public abstract class AbstractRedactionIntegrationTest {
|
||||
.collect(Collectors.toSet()));
|
||||
dossierDictionary.put(IMPORTED_REDACTION_INDICATOR, new ArrayList<>());
|
||||
dossierDictionary.put(PUBLISHED_INFORMATION_INDICATOR, new ArrayList<>());
|
||||
dossierDictionary.put(DICTIONARY_AUTHOR, new ArrayList<>());
|
||||
|
||||
falsePositive.computeIfAbsent(DICTIONARY_PII, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt")
|
||||
|
||||
@ -9,6 +9,7 @@ import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Arrays;
|
||||
@ -17,11 +18,13 @@ import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
@ -56,7 +59,6 @@ import com.iqser.red.service.redaction.v1.server.service.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
import com.iqser.red.service.redaction.v1.server.testcontainers.MongoDBTestContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.knecon.fforesight.mongo.database.commons.liquibase.TenantMongoLiquibaseExecutor;
|
||||
import com.knecon.fforesight.mongo.database.commons.service.MongoConnectionProvider;
|
||||
@ -80,7 +82,25 @@ import lombok.extern.slf4j.Slf4j;
|
||||
* This way you can recreate what is happening on the stack almost exactly.
|
||||
*/ public class AnalysisEnd2EndTest {
|
||||
|
||||
Path dossierTemplateToUse = Path.of("/home/kschuettler/iqser/fforesight/dossier-templates-v2/dev/LayoutParsingDatasetEvaluation"); // Add your dossier-template here
|
||||
// These files will be uploaded if they are present in the folder
|
||||
public static final Set<FileType> ENDINGS_TO_UPLOAD = Set.of(FileType.ORIGIN,
|
||||
FileType.DOCUMENT_PAGES,
|
||||
FileType.DOCUMENT_POSITION,
|
||||
FileType.DOCUMENT_STRUCTURE,
|
||||
FileType.DOCUMENT_TEXT,
|
||||
FileType.IMAGE_INFO,
|
||||
FileType.NER_ENTITIES,
|
||||
FileType.TABLES,
|
||||
FileType.IMPORTED_REDACTIONS);
|
||||
|
||||
// These files must be present in the folder or the test will skip the file
|
||||
public static final Set<FileType> REQUIRED_FILES = Set.of(FileType.ORIGIN,
|
||||
FileType.DOCUMENT_PAGES,
|
||||
FileType.DOCUMENT_POSITION,
|
||||
FileType.DOCUMENT_STRUCTURE,
|
||||
FileType.DOCUMENT_TEXT);
|
||||
|
||||
Path dossierTemplateToUse = Path.of("/home/kschuettler/Downloads/mainBodyFailed/DOSSIER_TEMPLATE"); // Add your dossier-template here
|
||||
ObjectMapper mapper = ObjectMapperFactory.create();
|
||||
final String TENANT_ID = "tenant";
|
||||
|
||||
@ -121,7 +141,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@SneakyThrows
|
||||
public void runAnalysisEnd2End() {
|
||||
|
||||
String folder = "/home/kschuettler/Dokumente/analysisend2end/file1"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
|
||||
String folder = "/home/kschuettler/Downloads/mainBodyFailed/728d0af4-f4c4-4bc9-acf8-7d2632b02962/"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
|
||||
|
||||
Path absoluteFolderPath;
|
||||
if (folder.startsWith("files")) { // if it starts with "files" it is most likely in the resources folder, else it should be an absolute path
|
||||
@ -133,11 +153,14 @@ import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
log.info("Starting end2end analyses for all distinct filenames in folder: {}", folder);
|
||||
List<AnalyzeRequest> analyzeRequests = prepareStorageForFolder(absoluteFolderPath);
|
||||
log.info("Found {} distinct fileIds", analyzeRequests.size());
|
||||
log.info("Found {} distinct fileIds with all required files", analyzeRequests.size());
|
||||
for (int i = 0; i < analyzeRequests.size(); i++) {
|
||||
AnalyzeRequest analyzeRequest = analyzeRequests.get(i);
|
||||
log.info("----------------------------------------------------------------------------------");
|
||||
log.info("{}/{}: Starting analysis for file {}", i + 1, analyzeRequests.size(), analyzeRequest.getFileId());
|
||||
analyzeService.analyze(analyzeRequest);
|
||||
log.info("----------------------------------------------------------------------------------");
|
||||
log.info("");
|
||||
}
|
||||
}
|
||||
|
||||
@ -188,22 +211,36 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@SneakyThrows
|
||||
private List<AnalyzeRequest> prepareStorageForFolder(Path folder) {
|
||||
|
||||
return Files.list(folder)
|
||||
.map(this::parseFileId)
|
||||
.distinct()
|
||||
return findOriginFiles(folder).stream()
|
||||
.map(fileId -> prepareStorageForFile(fileId, folder))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
private String parseFileId(Path path) {
|
||||
private Set<String> findOriginFiles(Path folder) throws IOException {
|
||||
|
||||
return path.getFileName().toString().split("\\.")[0];
|
||||
return Files.walk(folder)
|
||||
.map(this::parseFileName)
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
|
||||
private String parseFileName(Path path) {
|
||||
|
||||
String suffix = ".ORIGIN.pdf";
|
||||
if (!path.getFileName().toString().endsWith(suffix)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return path.getFileName().toString().replace(suffix, "");
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private AnalyzeRequest prepareStorageForFile(String fileId, Path folder) {
|
||||
private Optional<AnalyzeRequest> prepareStorageForFile(String fileName, Path folder) {
|
||||
|
||||
AnalyzeRequest request = new AnalyzeRequest();
|
||||
request.setDossierId(UUID.randomUUID().toString());
|
||||
@ -211,53 +248,65 @@ import lombok.extern.slf4j.Slf4j;
|
||||
request.setDossierTemplateId(testDossierTemplate.id);
|
||||
request.setAnalysisNumber(-1);
|
||||
|
||||
Path manualRedactionFile = folder.resolve(fileId + ".MANUAL_REDACTIONS.json");
|
||||
Path manualRedactionFile = folder.resolve(fileName + ".MANUAL_REDACTIONS.json");
|
||||
if (Files.exists(manualRedactionFile)) {
|
||||
request.setManualRedactions(mapper.readValue(manualRedactionFile.toFile(), ManualRedactions.class));
|
||||
request.setManualRedactions(parseManualRedactions(manualRedactionFile));
|
||||
} else {
|
||||
request.setManualRedactions(new ManualRedactions());
|
||||
}
|
||||
|
||||
Set<FileType> endingsToUpload = Set.of("ORIGIN",
|
||||
"DOCUMENT_PAGES",
|
||||
"DOCUMENT_POSITION",
|
||||
"DOCUMENT_STRUCTURE",
|
||||
"DOCUMENT_TEXT",
|
||||
"IMAGE_INFO",
|
||||
"NER_ENTITIES",
|
||||
"TABLES",
|
||||
"IMPORTED_REDACTIONS")
|
||||
.stream()
|
||||
.map(FileType::valueOf)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
Set<FileType> uploadedFileTypes = Files.walk(folder)
|
||||
.filter(path -> path.toFile().isFile())
|
||||
.filter(path -> parseFileTypeFromPath(path).map(endingsToUpload::contains)
|
||||
.orElse(false))
|
||||
.map(filePath -> uploadFile(filePath, request))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
Set<FileType> uploadedFileTypes = findFilesToUpload(fileName, folder, ENDINGS_TO_UPLOAD).map(filePath -> uploadFile(filePath, request))
|
||||
.map(FileToUpload::fileType)
|
||||
.collect(Collectors.toUnmodifiableSet());
|
||||
|
||||
Set<FileType> missingFileTypes = Sets.difference(endingsToUpload, uploadedFileTypes);
|
||||
Set<FileType> missingFileTypes = Sets.difference(REQUIRED_FILES, uploadedFileTypes);
|
||||
|
||||
if (!missingFileTypes.isEmpty()) {
|
||||
log.error("Folder {} is missing files of type {}",
|
||||
folder.toFile(),
|
||||
missingFileTypes.stream()
|
||||
.map(Enum::toString)
|
||||
.collect(Collectors.joining(", ")));
|
||||
throw new NotFoundException("Not all required file types are present.");
|
||||
return Optional.empty();
|
||||
}
|
||||
return request;
|
||||
return Optional.of(request);
|
||||
}
|
||||
|
||||
|
||||
private static Optional<FileType> parseFileTypeFromPath(Path path) {
|
||||
private static Stream<FileToUpload> findFilesToUpload(String fileName, Path folder, Set<FileType> endingsToUpload) throws IOException {
|
||||
|
||||
return Files.walk(folder)
|
||||
.filter(path -> path.toFile().isFile())
|
||||
.map(path -> parseFileTypeFromPath(path, fileName, endingsToUpload))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
private ManualRedactions parseManualRedactions(Path manualRedactionFile) {
|
||||
|
||||
String fileType = path.getFileName().toString().split("\\.")[1];
|
||||
try {
|
||||
return Optional.of(FileType.valueOf(fileType));
|
||||
return mapper.readValue(manualRedactionFile.toFile(), ManualRedactions.class);
|
||||
} catch (IOException e) {
|
||||
log.error("Could not parse manual redactions");
|
||||
return new ManualRedactions();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static Optional<FileToUpload> parseFileTypeFromPath(Path path, String fileName, Set<FileType> endingsToUpload) {
|
||||
|
||||
if (!path.getFileName().toString().startsWith(fileName)) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
try {
|
||||
String fileTypeString = path.getFileName().toString().split("\\.")[1];
|
||||
FileType fileType = FileType.valueOf(fileTypeString);
|
||||
if (!endingsToUpload.contains(fileType)) {
|
||||
return Optional.empty();
|
||||
}
|
||||
return Optional.of(new FileToUpload(path, fileType));
|
||||
} catch (IllegalArgumentException e) {
|
||||
return Optional.empty();
|
||||
}
|
||||
@ -265,21 +314,26 @@ import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private Optional<FileType> uploadFile(Path path, AnalyzeRequest request) {
|
||||
private FileToUpload uploadFile(FileToUpload fileToUpload, AnalyzeRequest request) {
|
||||
|
||||
Optional<FileType> fileType = parseFileTypeFromPath(path);
|
||||
if (fileType.isEmpty()) {
|
||||
return Optional.empty();
|
||||
if (fileToUpload.path().getFileName().toString().endsWith(".gz")) {
|
||||
try (var fis = new FileInputStream(fileToUpload.path().toFile()); var in = new GZIPInputStream(fis);) {
|
||||
storageService.storeObject(TENANT_ID,
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), fileToUpload.fileType()),
|
||||
in);
|
||||
}
|
||||
} else {
|
||||
try (var in = new FileInputStream(fileToUpload.path().toFile())) {
|
||||
storageService.storeObject(TENANT_ID,
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), fileToUpload.fileType()),
|
||||
in);
|
||||
}
|
||||
}
|
||||
try (var fis = new FileInputStream(path.toFile()); var in = new GZIPInputStream(fis);) {
|
||||
storageService.storeObject(TENANT_ID, RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), fileType.get()), in);
|
||||
|
||||
}
|
||||
return fileType;
|
||||
return fileToUpload;
|
||||
}
|
||||
|
||||
|
||||
private class TestDossierTemplate {
|
||||
public class TestDossierTemplate {
|
||||
|
||||
String id;
|
||||
Dictionary testDictionary;
|
||||
@ -379,4 +433,8 @@ import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
}
|
||||
|
||||
private record FileToUpload(Path path, FileType fileType) {
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -174,6 +174,16 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
||||
expectedDates.add("03/08/1992");
|
||||
expectedDates.add("13/08/1992");
|
||||
expectedDates.add("27/02/1992");
|
||||
expectedDates.add("27/10/1989");
|
||||
expectedDates.add("07/10/1989");
|
||||
expectedDates.add("21/08/1998");
|
||||
expectedDates.add("02/08/1998");
|
||||
expectedDates.add("01/05/1988");
|
||||
expectedDates.add("02/06/2003");
|
||||
expectedDates.add("03/09/2005");
|
||||
expectedDates.add("06/09/2005");
|
||||
expectedDates.add("17/08/2005");
|
||||
expectedDates.add("22/08/2035");
|
||||
|
||||
String dates = experimentalDates.getComponentValues()
|
||||
.get(0).getValue();
|
||||
|
||||
@ -107,7 +107,7 @@ public class MigrationIntegrationTest extends BuildDocumentIntegrationTest {
|
||||
@SneakyThrows
|
||||
public void testSave() {
|
||||
|
||||
MigratedIds ids = new MigratedIds(new LinkedList<>(), null);
|
||||
MigratedIds ids = new MigratedIds(new LinkedList<>(), null, null);
|
||||
ids.addMapping("123", "321");
|
||||
ids.addMapping("123", "321");
|
||||
ids.addMapping("123", "321");
|
||||
|
||||
@ -10,6 +10,7 @@ import java.time.OffsetDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
@ -25,24 +26,30 @@ import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.FilterType;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit.jupiter.SpringExtension;
|
||||
|
||||
import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService;
|
||||
@ -50,6 +57,8 @@ import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsi
|
||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@ExtendWith(SpringExtension.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class)
|
||||
@ -103,6 +112,17 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.build(),
|
||||
Type.builder()
|
||||
.id(DOSSIER_AUTHOR_TYPE_ID)
|
||||
.type(DICTIONARY_AUTHOR)
|
||||
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
.hexColor("#ffe184")
|
||||
.isHint(hintTypeMap.get(DICTIONARY_AUTHOR))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DICTIONARY_AUTHOR))
|
||||
.isRecommendation(recommendationTypeMap.get(DICTIONARY_AUTHOR))
|
||||
.rank(rankTypeMap.get(DICTIONARY_AUTHOR))
|
||||
.build(),
|
||||
Type.builder()
|
||||
.id(DOSSIER_PUBLISHED_INFORMATION_TYPE_ID)
|
||||
.type(PUBLISHED_INFORMATION_INDICATOR)
|
||||
@ -158,10 +178,10 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst()
|
||||
.orElseThrow();
|
||||
assertThat(publishedInformationEntry1.getSection().startsWith("Paragraph:"));
|
||||
assertThat(publishedInformationEntry1.getSection()).startsWith("Paragraph:");
|
||||
var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst()
|
||||
.orElseThrow();
|
||||
assertThat(asyaLyon1.getSection().startsWith("Paragraph:"));
|
||||
assertThat(asyaLyon1.getSection()).startsWith("Paragraph:");
|
||||
assertEquals(EntryState.SKIPPED, asyaLyon1.getState());
|
||||
|
||||
var idRemoval = buildIdRemoval(publishedInformationEntry1.getId());
|
||||
@ -212,10 +232,10 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst()
|
||||
.orElseThrow();
|
||||
assertThat(publishedInformationEntry1.getSection().startsWith("Paragraph:"));
|
||||
assertThat(publishedInformationEntry1.getSection()).startsWith("Paragraph:");
|
||||
var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst()
|
||||
.orElseThrow();
|
||||
assertThat(publishedInformationEntry1.getSection().startsWith("Paragraph:"));
|
||||
assertThat(publishedInformationEntry1.getSection()).startsWith("Paragraph:");
|
||||
assertEquals(EntryState.SKIPPED, asyaLyon1.getState());
|
||||
|
||||
var idRemoval = buildIdRemoval(publishedInformationEntry1.getId());
|
||||
@ -244,6 +264,7 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
assertThat(dictionary.get(PUBLISHED_INFORMATION_INDICATOR).contains("Press")).isFalse();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testPublishedInformationRemovalAtDossierLevel() throws IOException {
|
||||
|
||||
@ -348,7 +369,7 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
var desireeEtAl = findEntityByTypeAndValue(entityLog, "CBI_author", "Desiree").filter(e -> e.getEntryType().equals(EntryType.ENTITY))
|
||||
.filter(e -> e.getMatchedRule().startsWith("CBI.16"))
|
||||
.filter(e -> e.getMatchedRule().startsWith("CBI.7"))
|
||||
.findAny()
|
||||
.orElseThrow();
|
||||
IdRemoval removal = buildIdRemoval(desireeEtAl.getId());
|
||||
@ -365,6 +386,75 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
void testNerEntitiesAfterReanalysis() {
|
||||
|
||||
String EFSA_SANITISATION_RULES = loadFromClassPath("drools/efsa_sanitisation.drl");
|
||||
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(EFSA_SANITISATION_RULES));
|
||||
|
||||
ClassPathResource responseJson = new ClassPathResource("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1 3.NER_ENTITIES.json");
|
||||
storageService.storeObject(TenantContext.getTenantId(),
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES),
|
||||
responseJson.getInputStream());
|
||||
|
||||
String pdfFile = "files/new/SYNGENTA_EFSA_sanitisation_GFL_v1 3.pdf";
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage(pdfFile);
|
||||
|
||||
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
|
||||
request.setAnalysisNumber(1);
|
||||
dossierDictionary.put(DICTIONARY_AUTHOR, new ArrayList<>());
|
||||
mockDictionaryCalls(0L);
|
||||
|
||||
analyzeService.analyze(request);
|
||||
|
||||
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
String nerValue = "Osip S.";
|
||||
var nerEntity = findEntityByTypeAndValue(entityLog, DICTIONARY_AUTHOR, nerValue).findFirst()
|
||||
.orElseThrow();
|
||||
assertThat(nerEntity.getEngines()).contains(Engine.NER);
|
||||
|
||||
String dictionaryAddValue = "cooperation";
|
||||
ManualRedactionEntry manualRedactionEntry = ManualRedactionEntry.builder()
|
||||
.value(dictionaryAddValue)
|
||||
.type(DICTIONARY_AUTHOR)
|
||||
.user("user")
|
||||
.addToDossierDictionary(true)
|
||||
.positions(List.of(Rectangle.builder().topLeftX(180.748f).topLeftY(546.564f).width(56.592f).height(15.408f).page(1).build()))
|
||||
.type("dossier_redaction")
|
||||
.fileId(TEST_FILE_ID)
|
||||
.requestDate(OffsetDateTime.now())
|
||||
.annotationId(UUID.randomUUID().toString())
|
||||
.build();
|
||||
request.setManualRedactions(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry)).build());
|
||||
|
||||
request.setAnalysisNumber(2);
|
||||
dossierDictionary.get(DICTIONARY_AUTHOR).add(dictionaryAddValue);
|
||||
reanlysisVersions.put(dictionaryAddValue, 2L);
|
||||
when(dictionaryClient.getVersionForDossier(TEST_DOSSIER_ID)).thenReturn(2L);
|
||||
mockDictionaryCalls(1L);
|
||||
|
||||
AnalyzeResult reanalyzeResult = analyzeService.reanalyze(request);
|
||||
|
||||
entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
EntityLogEntry entityLogEntryAdded = entityLog.getEntityLogEntry()
|
||||
.stream()
|
||||
.filter(entityLogEntry -> entityLogEntry.getValue().equals(dictionaryAddValue))
|
||||
.findFirst()
|
||||
.get();
|
||||
assertEquals(EntryState.APPLIED, entityLogEntryAdded.getState());
|
||||
|
||||
nerEntity = findEntityByTypeAndValue(entityLog, DICTIONARY_AUTHOR, nerValue).findFirst()
|
||||
.orElseThrow();
|
||||
assertThat(nerEntity.getEngines()).contains(Engine.NER);
|
||||
dossierDictionary.get(DICTIONARY_AUTHOR).remove(dictionaryAddValue);
|
||||
|
||||
}
|
||||
|
||||
|
||||
private static IdRemoval buildIdRemoval(String id) {
|
||||
|
||||
return IdRemoval.builder().annotationId(id).user("user").requestDate(OffsetDateTime.now()).fileId(TEST_FILE_ID).build();
|
||||
|
||||
@ -43,7 +43,17 @@ public class DateConverterTest {
|
||||
"28 March 2018 (animal 1 - 5000 mg/kg bw)",
|
||||
"28 March 2018 (animal1 - 5000 mg/kg bw)",
|
||||
"28 August 2018 (animal 1)",
|
||||
"31 August 2018 (animal 1)");
|
||||
"31 August 2018 (animal 1)",
|
||||
"October, 27 1989",
|
||||
"October, 7 1989",
|
||||
"August 21,1998",
|
||||
"August 2,1998",
|
||||
"1st May 1988",
|
||||
"2nd June 2003",
|
||||
"3rd September 2005",
|
||||
"6th September 2005",
|
||||
"17th August 2005",
|
||||
"22nd August 2035");
|
||||
|
||||
for (String dateStr : goldenStandardDates) {
|
||||
Optional<Date> parsedDate = DateConverter.parseDate(dateStr);
|
||||
|
||||
@ -361,7 +361,6 @@ class DroolsValidationServiceTest {
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
void testRulesWithBlacklistedKeyword() {
|
||||
@ -379,16 +378,16 @@ class DroolsValidationServiceTest {
|
||||
String rulesString1 = rulesString.substring(0, indexGlobalStart) + importTenantContext + rulesString.substring(indexGlobalStart);
|
||||
|
||||
String evilRulePart1 = """
|
||||
|
||||
|
||||
//------------------------------------ All the evil rules ------------------------------------
|
||||
|
||||
|
||||
// Rule unit: EV.1
|
||||
rule "EV.1.0: Remove duplicate FileAttributes but also do very evil things"
|
||||
salience 999
|
||||
when
|
||||
$fileAttribute: FileAttribute($label: label, $value: value)
|
||||
$duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value)
|
||||
""";
|
||||
""";
|
||||
String evilRulePart2 = """
|
||||
then
|
||||
retract($duplicate);
|
||||
@ -408,11 +407,13 @@ class DroolsValidationServiceTest {
|
||||
.forEach(System.out::println);
|
||||
assertFalse(droolsValidation.isCompiled());
|
||||
assertEquals(2, droolsValidation.getBlacklistErrorMessages().size());
|
||||
assertEquals(1, droolsValidation.getBlacklistErrorMessages()
|
||||
assertEquals(1,
|
||||
droolsValidation.getBlacklistErrorMessages()
|
||||
.get(0).getBlacklistedKeywords().size());
|
||||
assertTrue(droolsValidation.getBlacklistErrorMessages()
|
||||
.get(0).getBlacklistedKeywords().contains("TenantContext"));
|
||||
assertEquals(2, droolsValidation.getBlacklistErrorMessages()
|
||||
assertEquals(2,
|
||||
droolsValidation.getBlacklistErrorMessages()
|
||||
.get(1).getBlacklistedKeywords().size());
|
||||
assertTrue(droolsValidation.getBlacklistErrorMessages()
|
||||
.get(1).getBlacklistedKeywords().contains("TenantContext"));
|
||||
@ -429,14 +430,39 @@ class DroolsValidationServiceTest {
|
||||
.forEach(System.out::println);
|
||||
assertFalse(droolsValidation2.isCompiled());
|
||||
assertEquals(2, droolsValidation2.getBlacklistErrorMessages().size());
|
||||
assertEquals(1, droolsValidation2.getBlacklistErrorMessages()
|
||||
assertEquals(1,
|
||||
droolsValidation2.getBlacklistErrorMessages()
|
||||
.get(0).getBlacklistedKeywords().size());
|
||||
assertTrue(droolsValidation2.getBlacklistErrorMessages()
|
||||
.get(0).getBlacklistedKeywords().contains("TenantContext"));
|
||||
assertEquals(1, droolsValidation2.getBlacklistErrorMessages()
|
||||
assertEquals(1,
|
||||
droolsValidation2.getBlacklistErrorMessages()
|
||||
.get(1).getBlacklistedKeywords().size());
|
||||
assertTrue(droolsValidation2.getBlacklistErrorMessages()
|
||||
.get(1).getBlacklistedKeywords().contains("System."));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void assertRuleIdentifierDoesNotMatch() {
|
||||
|
||||
String ruleString = RuleManagementResources.getBaseRuleFileString() + """
|
||||
rule "CBI.0.0: Redact CBI Authors (non vertebrate Study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$entity: TextEntity(type() == "CBI_author", dictionaryEntry)
|
||||
then
|
||||
$entity.redact(
|
||||
"CBI.1.0",
|
||||
"Author found",
|
||||
"Article 39(e)(3) of Regulation (EC) No 178/2002"
|
||||
);
|
||||
end
|
||||
""";
|
||||
|
||||
RuleFileBluePrint ruleFileBluePrint = RuleFileParser.buildBluePrintFromRulesString(ruleString);
|
||||
|
||||
assertFalse(ruleFileBluePrint.getDroolsValidation().isCompiled());
|
||||
}
|
||||
|
||||
}
|
||||
@ -129,56 +129,54 @@ rule "CBI.2.0: Do not redact genitive CBI Author"
|
||||
|
||||
|
||||
// Rule unit: CBI.7
|
||||
rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables"
|
||||
rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$section: Section(!hasTables(),
|
||||
hasEntitiesOfType("published_information"),
|
||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.skipWithReferences(
|
||||
"CBI.7.0",
|
||||
"Published Information found in section",
|
||||
$section.getEntitiesOfType("published_information")
|
||||
);
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row"
|
||||
rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
|
||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
|
||||
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
$authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
|
||||
end
|
||||
|
||||
rule "CBI.7.2: Do not redact PII if published information found in Section without tables"
|
||||
when
|
||||
$section: Section(!hasTables(),
|
||||
hasEntitiesOfType("published_information"),
|
||||
hasEntitiesOfType("PII"))
|
||||
then
|
||||
$section.getEntitiesOfType("PII")
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.skipWithReferences(
|
||||
"CBI.7.2",
|
||||
"Published Information found in section",
|
||||
$section.getEntitiesOfType("published_information")
|
||||
);
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.7.3: Do not redact PII if published information found in same table row"
|
||||
rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
|
||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
|
||||
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
$pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
@ -289,54 +287,56 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study
|
||||
|
||||
|
||||
// Rule unit: CBI.16
|
||||
rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables"
|
||||
when
|
||||
$section: Section(containsString("et al."))
|
||||
$section: Section(!hasTables(),
|
||||
hasEntitiesOfType("published_information"),
|
||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.skipWithReferences(
|
||||
"CBI.16.0",
|
||||
"Published Information found in section",
|
||||
$section.getEntitiesOfType("published_information")
|
||||
);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$section: Section(containsString("et al."))
|
||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
|
||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
$authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
|
||||
end
|
||||
|
||||
rule "CBI.16.2: Do not redact PII if published information found in Section without tables"
|
||||
when
|
||||
$section: Section(!hasTables(),
|
||||
hasEntitiesOfType("published_information"),
|
||||
hasEntitiesOfType("PII"))
|
||||
then
|
||||
$section.getEntitiesOfType("PII")
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.skipWithReferences(
|
||||
"CBI.16.2",
|
||||
"Published Information found in section",
|
||||
$section.getEntitiesOfType("published_information")
|
||||
);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
rule "CBI.16.3: Do not redact PII if published information found in same table row"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$section: Section(containsString("et al."))
|
||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
|
||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
$pii.skipWithReferences("CBI.16.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
|
||||
end
|
||||
|
||||
|
||||
|
||||
@ -312,56 +312,54 @@ rule "CBI.6.1: Do not redact Names and Addresses if vertebrate but also publishe
|
||||
|
||||
|
||||
// Rule unit: CBI.7
|
||||
rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables"
|
||||
rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$section: Section(!hasTables(),
|
||||
hasEntitiesOfType("published_information"),
|
||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.skipWithReferences(
|
||||
"CBI.7.0",
|
||||
"Published Information found in section",
|
||||
$section.getEntitiesOfType("published_information")
|
||||
);
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row"
|
||||
rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
|
||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
|
||||
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
$authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
|
||||
end
|
||||
|
||||
rule "CBI.7.2: Do not redact PII if published information found in Section without tables"
|
||||
when
|
||||
$section: Section(!hasTables(),
|
||||
hasEntitiesOfType("published_information"),
|
||||
hasEntitiesOfType("PII"))
|
||||
then
|
||||
$section.getEntitiesOfType("PII")
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.skipWithReferences(
|
||||
"CBI.7.2",
|
||||
"Published Information found in section",
|
||||
$section.getEntitiesOfType("published_information")
|
||||
);
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.7.3: Do not redact PII if published information found in same table row"
|
||||
rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
|
||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
|
||||
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
$pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
@ -654,54 +652,56 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio
|
||||
|
||||
|
||||
// Rule unit: CBI.16
|
||||
rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables"
|
||||
when
|
||||
$section: Section(containsString("et al."))
|
||||
$section: Section(!hasTables(),
|
||||
hasEntitiesOfType("published_information"),
|
||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.skipWithReferences(
|
||||
"CBI.16.0",
|
||||
"Published Information found in section",
|
||||
$section.getEntitiesOfType("published_information")
|
||||
);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$section: Section(containsString("et al."))
|
||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
|
||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
$authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
|
||||
end
|
||||
|
||||
rule "CBI.16.2: Do not redact PII if published information found in Section without tables"
|
||||
when
|
||||
$section: Section(!hasTables(),
|
||||
hasEntitiesOfType("published_information"),
|
||||
hasEntitiesOfType("PII"))
|
||||
then
|
||||
$section.getEntitiesOfType("PII")
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.skipWithReferences(
|
||||
"CBI.16.2",
|
||||
"Published Information found in section",
|
||||
$section.getEntitiesOfType("published_information")
|
||||
);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
rule "CBI.16.3: Do not redact PII if published information found in same table row"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$section: Section(containsString("et al."))
|
||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
|
||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
$pii.skipWithReferences("CBI.16.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
|
||||
end
|
||||
|
||||
|
||||
|
||||
@ -129,30 +129,30 @@ rule "CBI.2.0: Do not redact genitive CBI Author"
|
||||
|
||||
|
||||
// Rule unit: CBI.7
|
||||
rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables"
|
||||
rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$section: Section(!hasTables(),
|
||||
hasEntitiesOfType("published_information"),
|
||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.skipWithReferences(
|
||||
"CBI.7.0",
|
||||
"Published Information found in section",
|
||||
$section.getEntitiesOfType("published_information")
|
||||
);
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row"
|
||||
rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
|
||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
|
||||
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
$authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
@ -224,30 +224,30 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study
|
||||
|
||||
|
||||
// Rule unit: CBI.16
|
||||
rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$section: Section(containsString("et al."))
|
||||
$section: Section(!hasTables(),
|
||||
hasEntitiesOfType("published_information"),
|
||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.skipWithReferences(
|
||||
"CBI.16.0",
|
||||
"Published Information found in section",
|
||||
$section.getEntitiesOfType("published_information")
|
||||
);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$section: Section(containsString("et al."))
|
||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
|
||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
$authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
|
||||
end
|
||||
|
||||
|
||||
|
||||
@ -214,6 +214,58 @@ rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also red
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: CBI.7
|
||||
rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: CBI.8
|
||||
rule "CBI.8.0: Redacted because Section contains must_redact entity"
|
||||
when
|
||||
@ -424,58 +476,6 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: CBI.16
|
||||
rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: CBI.17
|
||||
rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon"
|
||||
when
|
||||
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -4,11 +4,13 @@ import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.knecon.fforesight.utility.rules.management.factory.RuleFileFactory;
|
||||
import com.knecon.fforesight.utility.rules.management.factory.RuleFileParser;
|
||||
import com.knecon.fforesight.utility.rules.management.models.BasicRule;
|
||||
import com.knecon.fforesight.utility.rules.management.models.RuleFileBluePrint;
|
||||
import com.knecon.fforesight.utility.rules.management.models.RuleIdentifier;
|
||||
import com.knecon.fforesight.utility.rules.management.utils.RuleFileIO;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
@ -21,17 +23,15 @@ import lombok.experimental.UtilityClass;
|
||||
@UtilityClass
|
||||
public class RuleFileMigrator {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void migrateFile(File ruleFile) {
|
||||
|
||||
RuleFileBluePrint ruleFileBluePrint = RuleFileParser.buildBluePrintFromRulesString(RuleFileIO.getRulesString(ruleFile.getAbsolutePath()));
|
||||
RuleFileBluePrint combinedBluePrint = RuleFileParser.buildBluePrintFromAllRuleFiles();
|
||||
|
||||
for (BasicRule ruleToReplace : ruleFileBluePrint.getAllRules()) {
|
||||
List<BasicRule> rulesToAdd = combinedBluePrint.findRuleByIdentifier(ruleToReplace.identifier());
|
||||
ruleFileBluePrint.removeRule(ruleToReplace.identifier());
|
||||
rulesToAdd.forEach(ruleFileBluePrint::addRule);
|
||||
}
|
||||
//replaceRules(ruleFileBluePrint, combinedBluePrint);
|
||||
replaceRuleIdentifiers(combinedBluePrint, ruleFileBluePrint);
|
||||
|
||||
String migratedRulesString = RuleFileFactory.buildRuleString(ruleFileBluePrint);
|
||||
String migratedFilePath = ruleFile.getAbsolutePath();
|
||||
@ -40,4 +40,35 @@ public class RuleFileMigrator {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static void replaceRules(RuleFileBluePrint ruleFileBluePrint, RuleFileBluePrint combinedBluePrint) {
|
||||
|
||||
for (BasicRule ruleToReplace : ruleFileBluePrint.getAllRules()) {
|
||||
List<BasicRule> rulesToAdd = combinedBluePrint.findRuleByIdentifier(ruleToReplace.identifier());
|
||||
ruleFileBluePrint.removeRule(ruleToReplace.identifier());
|
||||
rulesToAdd.forEach(ruleFileBluePrint::addRule);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static void replaceRuleIdentifiers(RuleFileBluePrint combinedBluePrint, RuleFileBluePrint ruleFileBluePrint) {
|
||||
|
||||
Map<String, String> identifierReplaceMap = Map.of("CBI.7.0", "CBI.16.0", "CBI.7.1", "CBI.16.1", "CBI.7.2", "CBI.16.2", "CBI.7.3", "CBI.16.3");
|
||||
for (String identifier : identifierReplaceMap.keySet()) {
|
||||
RuleIdentifier ruleId = RuleIdentifier.fromString(identifier);
|
||||
RuleIdentifier otherRuleId = RuleIdentifier.fromString(identifierReplaceMap.get(identifier));
|
||||
|
||||
List<BasicRule> rulesToAdd = combinedBluePrint.findRuleByIdentifier(otherRuleId);
|
||||
List<BasicRule> otherRulesToAdd = combinedBluePrint.findRuleByIdentifier(ruleId);
|
||||
boolean removeRules = ruleFileBluePrint.removeRule(ruleId);
|
||||
boolean removeOtherRules = ruleFileBluePrint.removeRule(otherRuleId);
|
||||
if (removeRules) {
|
||||
rulesToAdd.forEach(ruleFileBluePrint::addRule);
|
||||
}
|
||||
if (removeOtherRules) {
|
||||
otherRulesToAdd.forEach(ruleFileBluePrint::addRule);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -7,15 +7,19 @@ import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public record RuleFileBluePrint(String imports, String globals, String queries, List<RuleClass> ruleClasses) {
|
||||
|
||||
public void removeRule(RuleIdentifier ruleIdentifier) {
|
||||
public boolean removeRule(RuleIdentifier ruleIdentifier) {
|
||||
|
||||
AtomicBoolean wasRemoved = new AtomicBoolean(false);
|
||||
|
||||
findRuleClassByType(ruleIdentifier.type()).ifPresent(ruleClass -> ruleClass.findRuleUnitByInteger(ruleIdentifier.unit())
|
||||
.ifPresent(ruleUnit -> {
|
||||
ruleUnit.rules().removeIf(rule -> rule.identifier().matches(ruleIdentifier));
|
||||
boolean removed = ruleUnit.rules().removeIf(rule -> rule.identifier().matches(ruleIdentifier));
|
||||
wasRemoved.set(removed);
|
||||
if (ruleUnit.rules().isEmpty()) {
|
||||
ruleClass.ruleUnits().remove(ruleUnit);
|
||||
}
|
||||
@ -23,7 +27,7 @@ public record RuleFileBluePrint(String imports, String globals, String queries,
|
||||
ruleClasses().remove(ruleClass);
|
||||
}
|
||||
}));
|
||||
|
||||
return wasRemoved.get();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -312,58 +312,55 @@ rule "CBI.6.1: Do not redact Names and Addresses if vertebrate but also publishe
|
||||
|
||||
|
||||
// Rule unit: CBI.7
|
||||
rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables"
|
||||
rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$section: Section(!hasTables(),
|
||||
hasEntitiesOfType("published_information"),
|
||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.skipWithReferences(
|
||||
"CBI.7.0",
|
||||
"Published Information found in section",
|
||||
$section.getEntitiesOfType("published_information")
|
||||
);
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row"
|
||||
rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
|
||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
|
||||
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
$authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
|
||||
end
|
||||
|
||||
rule "CBI.7.2: Do not redact PII if published information found in Section without tables"
|
||||
when
|
||||
$section: Section(!hasTables(),
|
||||
hasEntitiesOfType("published_information"),
|
||||
hasEntitiesOfType("PII"))
|
||||
then
|
||||
$section.getEntitiesOfType("PII")
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.skipWithReferences(
|
||||
"CBI.7.2",
|
||||
"Published Information found in section",
|
||||
$section.getEntitiesOfType("published_information")
|
||||
);
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.7.3: Do not redact PII if published information found in same table row"
|
||||
rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
|
||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
|
||||
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
$pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
// Rule unit: CBI.8
|
||||
rule "CBI.8.0: Redacted because Section contains must_redact entity"
|
||||
@ -653,54 +650,56 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio
|
||||
|
||||
|
||||
// Rule unit: CBI.16
|
||||
rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables"
|
||||
when
|
||||
$section: Section(containsString("et al."))
|
||||
$section: Section(!hasTables(),
|
||||
hasEntitiesOfType("published_information"),
|
||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.skipWithReferences(
|
||||
"CBI.16.0",
|
||||
"Published Information found in section",
|
||||
$section.getEntitiesOfType("published_information")
|
||||
);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$section: Section(containsString("et al."))
|
||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
|
||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
$authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
|
||||
end
|
||||
|
||||
rule "CBI.16.2: Do not redact PII if published information found in Section without tables"
|
||||
when
|
||||
$section: Section(!hasTables(),
|
||||
hasEntitiesOfType("published_information"),
|
||||
hasEntitiesOfType("PII"))
|
||||
then
|
||||
$section.getEntitiesOfType("PII")
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.skipWithReferences(
|
||||
"CBI.16.2",
|
||||
"Published Information found in section",
|
||||
$section.getEntitiesOfType("published_information")
|
||||
);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
rule "CBI.16.3: Do not redact PII if published information found in same table row"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||
$section: Section(containsString("et al."))
|
||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
|
||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||
.forEach(entity -> {
|
||||
entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
|
||||
dictionary.recommendEverywhere(entity);
|
||||
});
|
||||
$pii.skipWithReferences("CBI.16.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
|
||||
end
|
||||
|
||||
|
||||
|
||||
@ -25,10 +25,12 @@ public class RuleFileMigrationTest {
|
||||
|
||||
// Put your redaction service drools paths and dossier-templates paths both RM and DM here
|
||||
static final List<String> ruleFileDirs = List.of(
|
||||
"/home/kschuettler/iqser/redaction/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools",
|
||||
"/home/kschuettler/iqser/redaction/dossier-templates-v2",
|
||||
"/home/kschuettler/iqser/fforesight/dossier-templates-v2",
|
||||
"/home/kschuettler/iqser/business-logic");
|
||||
//"/Users/maverickstuder/Documents/RedactManager/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools",
|
||||
// "/Users/maverickstuder/Documents/RedactManager/dossier-templates-v2"
|
||||
"/Users/maverickstuder/Documents/PM"
|
||||
|
||||
);
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user