Compare commits
44 Commits
master
...
RED-9859-b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
557990273d | ||
|
|
63041927fc | ||
|
|
10e0c68a1f | ||
|
|
3ea73aa859 | ||
|
|
179ac6d9ad | ||
|
|
bdc6ab7e96 | ||
|
|
e959d60ec0 | ||
|
|
6b6d06d24e | ||
|
|
8ac0657795 | ||
|
|
dad17bb504 | ||
|
|
f445b7fe69 | ||
|
|
0692cc90e4 | ||
|
|
ab114b0920 | ||
|
|
7396c04314 | ||
|
|
305cd8f5ac | ||
|
|
b4ecbde89e | ||
|
|
7c31d4f70b | ||
|
|
f08654a082 | ||
|
|
2cf7f7c7b2 | ||
|
|
a51f10b9d1 | ||
|
|
8c36035655 | ||
|
|
67bb4fe7f9 | ||
|
|
2a9101306c | ||
|
|
6ecac11df5 | ||
|
|
e663fd2f2a | ||
|
|
0ef4087b36 | ||
|
|
bf3ae1606b | ||
|
|
43620f7b52 | ||
|
|
92fc003576 | ||
|
|
ed02a83289 | ||
|
|
78f5aaa54e | ||
|
|
acb5b4c308 | ||
|
|
61ee1c12ca | ||
|
|
abec7ae6bf | ||
|
|
afeddb4d91 | ||
|
|
359c237943 | ||
|
|
9789943f45 | ||
|
|
f096aab156 | ||
|
|
156b102e87 | ||
|
|
180728721a | ||
|
|
fb9d1042ac | ||
|
|
046b4b29b9 | ||
|
|
dce797ef8e | ||
|
|
8b8dab2a18 |
@ -4,7 +4,7 @@ plugins {
|
|||||||
}
|
}
|
||||||
|
|
||||||
description = "redaction-service-api-v1"
|
description = "redaction-service-api-v1"
|
||||||
val persistenceServiceVersion = "2.439.0"
|
val persistenceServiceVersion = "2.465.60"
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
implementation("org.springframework:spring-web:6.0.12")
|
implementation("org.springframework:spring-web:6.0.12")
|
||||||
|
|||||||
@ -12,11 +12,11 @@ plugins {
|
|||||||
description = "redaction-service-server-v1"
|
description = "redaction-service-server-v1"
|
||||||
|
|
||||||
|
|
||||||
val layoutParserVersion = "0.141.0"
|
val layoutParserVersion = "0.142.6"
|
||||||
val jacksonVersion = "2.15.2"
|
val jacksonVersion = "2.15.2"
|
||||||
val droolsVersion = "9.44.0.Final"
|
val droolsVersion = "9.44.0.Final"
|
||||||
val pdfBoxVersion = "3.0.0"
|
val pdfBoxVersion = "3.0.0"
|
||||||
val persistenceServiceVersion = "2.444.0"
|
val persistenceServiceVersion = "2.465.60"
|
||||||
val springBootStarterVersion = "3.1.5"
|
val springBootStarterVersion = "3.1.5"
|
||||||
val springCloudVersion = "4.0.4"
|
val springCloudVersion = "4.0.4"
|
||||||
val testContainersVersion = "1.19.7"
|
val testContainersVersion = "1.19.7"
|
||||||
@ -43,6 +43,7 @@ dependencies {
|
|||||||
implementation("com.iqser.red.commons:storage-commons:2.45.0")
|
implementation("com.iqser.red.commons:storage-commons:2.45.0")
|
||||||
implementation("com.knecon.fforesight:tenant-commons:0.24.0")
|
implementation("com.knecon.fforesight:tenant-commons:0.24.0")
|
||||||
implementation("com.knecon.fforesight:tracing-commons:0.5.0")
|
implementation("com.knecon.fforesight:tracing-commons:0.5.0")
|
||||||
|
implementation("com.knecon.fforesight:lifecycle-commons:0.6.0")
|
||||||
|
|
||||||
implementation("com.fasterxml.jackson.module:jackson-module-afterburner:${jacksonVersion}")
|
implementation("com.fasterxml.jackson.module:jackson-module-afterburner:${jacksonVersion}")
|
||||||
implementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:${jacksonVersion}")
|
implementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:${jacksonVersion}")
|
||||||
@ -130,18 +131,19 @@ tasks.named<BootBuildImage>("bootBuildImage") {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fun parseDroolsImports(droolsFilePath: String): List<String> {
|
fun parseDroolsImports(vararg droolsFilePaths: String): List<String> {
|
||||||
|
|
||||||
val imports = mutableListOf<String>()
|
val imports = mutableListOf<String>()
|
||||||
val importPattern = Regex("^import\\s+(com\\.iqser\\.red\\.service\\.redaction\\.v1\\.[\\w.]+);")
|
val importPattern = Regex("^import\\s+(com\\.iqser\\.red\\.service\\.redaction\\.v1\\.[\\w.]+);")
|
||||||
val desiredPrefix = "com.iqser.red.service.redaction.v1"
|
val desiredPrefix = "com.iqser.red.service.redaction.v1"
|
||||||
|
|
||||||
File(droolsFilePath).forEachLine { line ->
|
droolsFilePaths.forEach { filePath ->
|
||||||
importPattern.find(line)?.let { matchResult ->
|
File(filePath).forEachLine { line ->
|
||||||
val importPath = matchResult.groupValues[1].trim()
|
importPattern.find(line)?.let { matchResult ->
|
||||||
if (importPath.startsWith(desiredPrefix)) {
|
val importPath = matchResult.groupValues[1].trim()
|
||||||
val formattedPath = importPath.replace('.', '/')
|
if (importPath.startsWith(desiredPrefix)) {
|
||||||
imports.add("$formattedPath.java")
|
val formattedPath = importPath.replace('.', '/')
|
||||||
|
imports.add("$formattedPath.java")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -149,7 +151,11 @@ fun parseDroolsImports(droolsFilePath: String): List<String> {
|
|||||||
return imports
|
return imports
|
||||||
}
|
}
|
||||||
|
|
||||||
val droolsImports = parseDroolsImports("redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl")
|
// Combine imports from both drools files
|
||||||
|
val droolsImports = parseDroolsImports(
|
||||||
|
"redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl",
|
||||||
|
"redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/base_component_rules.drl"
|
||||||
|
)
|
||||||
|
|
||||||
tasks.register("generateJavaDoc", Javadoc::class) {
|
tasks.register("generateJavaDoc", Javadoc::class) {
|
||||||
|
|
||||||
|
|||||||
@ -13,6 +13,7 @@ import org.springframework.boot.context.properties.EnableConfigurationProperties
|
|||||||
import org.springframework.cache.annotation.EnableCaching;
|
import org.springframework.cache.annotation.EnableCaching;
|
||||||
import org.springframework.cloud.openfeign.EnableFeignClients;
|
import org.springframework.cloud.openfeign.EnableFeignClients;
|
||||||
import org.springframework.context.annotation.Bean;
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.context.annotation.EnableAspectJAutoProxy;
|
||||||
import org.springframework.context.annotation.Import;
|
import org.springframework.context.annotation.Import;
|
||||||
import org.springframework.data.mongodb.repository.config.EnableMongoRepositories;
|
import org.springframework.data.mongodb.repository.config.EnableMongoRepositories;
|
||||||
|
|
||||||
@ -20,6 +21,7 @@ import com.iqser.red.service.dictionarymerge.commons.DictionaryMergeService;
|
|||||||
import com.iqser.red.service.persistence.service.v1.api.shared.mongo.SharedMongoAutoConfiguration;
|
import com.iqser.red.service.persistence.service.v1.api.shared.mongo.SharedMongoAutoConfiguration;
|
||||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||||
|
import com.knecon.fforesight.lifecyclecommons.LifecycleAutoconfiguration;
|
||||||
import com.knecon.fforesight.mongo.database.commons.MongoDatabaseCommonsAutoConfiguration;
|
import com.knecon.fforesight.mongo.database.commons.MongoDatabaseCommonsAutoConfiguration;
|
||||||
import com.knecon.fforesight.mongo.database.commons.liquibase.EnableMongoLiquibase;
|
import com.knecon.fforesight.mongo.database.commons.liquibase.EnableMongoLiquibase;
|
||||||
import com.knecon.fforesight.tenantcommons.MultiTenancyAutoConfiguration;
|
import com.knecon.fforesight.tenantcommons.MultiTenancyAutoConfiguration;
|
||||||
@ -32,13 +34,14 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@EnableCaching
|
@EnableCaching
|
||||||
@ImportAutoConfiguration({MultiTenancyAutoConfiguration.class, SharedMongoAutoConfiguration.class})
|
@ImportAutoConfiguration({MultiTenancyAutoConfiguration.class, SharedMongoAutoConfiguration.class, LifecycleAutoconfiguration.class})
|
||||||
@Import({MetricsConfiguration.class, StorageAutoConfiguration.class, MongoDatabaseCommonsAutoConfiguration.class})
|
@Import({MetricsConfiguration.class, StorageAutoConfiguration.class, MongoDatabaseCommonsAutoConfiguration.class})
|
||||||
@EnableFeignClients(basePackageClasses = RulesClient.class)
|
@EnableFeignClients(basePackageClasses = RulesClient.class)
|
||||||
@EnableConfigurationProperties(RedactionServiceSettings.class)
|
@EnableConfigurationProperties(RedactionServiceSettings.class)
|
||||||
@EnableMongoRepositories(basePackages = "com.iqser.red.service.persistence")
|
@EnableMongoRepositories(basePackages = "com.iqser.red.service.persistence")
|
||||||
@EnableMongoLiquibase
|
@EnableMongoLiquibase
|
||||||
@SpringBootApplication(exclude = {SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class, DataSourceAutoConfiguration.class, LiquibaseAutoConfiguration.class, MongoAutoConfiguration.class, MongoDataAutoConfiguration.class})
|
@SpringBootApplication(exclude = {SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class, DataSourceAutoConfiguration.class, LiquibaseAutoConfiguration.class, MongoAutoConfiguration.class, MongoDataAutoConfiguration.class})
|
||||||
|
@EnableAspectJAutoProxy
|
||||||
public class Application {
|
public class Application {
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
|
|||||||
@ -21,6 +21,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog
|
|||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
|
||||||
@ -119,9 +120,14 @@ public class RedactionLogToEntityLogMigrationService {
|
|||||||
.filter(MigrationEntity::needsManualEntry)
|
.filter(MigrationEntity::needsManualEntry)
|
||||||
.map(MigrationEntity::buildManualRedactionEntry)
|
.map(MigrationEntity::buildManualRedactionEntry)
|
||||||
.toList();
|
.toList();
|
||||||
|
|
||||||
idsToMigrateInDb.setManualRedactionEntriesToAdd(manualRedactionEntriesToAdd);
|
idsToMigrateInDb.setManualRedactionEntriesToAdd(manualRedactionEntriesToAdd);
|
||||||
|
|
||||||
|
List<String> manualForceRedactionIdsToDelete = entitiesToMigrate.stream()
|
||||||
|
.filter(MigrationEntity::needsForceDeletion)
|
||||||
|
.map(MigrationEntity::getNewId)
|
||||||
|
.toList();
|
||||||
|
idsToMigrateInDb.setForceRedactionIdsToDelete(manualForceRedactionIdsToDelete);
|
||||||
|
|
||||||
return new MigratedEntityLog(idsToMigrateInDb, entityLog);
|
return new MigratedEntityLog(idsToMigrateInDb, entityLog);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -23,6 +23,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog
|
|||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualChangeFactory;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualChangeFactory;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
||||||
@ -220,6 +221,11 @@ public final class MigrationEntity {
|
|||||||
&& !entityLogEntry.getChanges().isEmpty()
|
&& !entityLogEntry.getChanges().isEmpty()
|
||||||
&& entityLogEntry.getChanges().stream().map(Change::getType).toList().get(entityLogEntry.getChanges().size() - 1).equals(ChangeType.REMOVED)) {
|
&& entityLogEntry.getChanges().stream().map(Change::getType).toList().get(entityLogEntry.getChanges().size() - 1).equals(ChangeType.REMOVED)) {
|
||||||
entityLogEntry.setState(EntryState.REMOVED);
|
entityLogEntry.setState(EntryState.REMOVED);
|
||||||
|
if (!entityLogEntry.getManualChanges().isEmpty()) {
|
||||||
|
entityLogEntry.getManualChanges()
|
||||||
|
.removeIf(manualChange -> manualChange.getManualRedactionType()
|
||||||
|
.equals(com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.FORCE));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return entityLogEntry;
|
return entityLogEntry;
|
||||||
@ -466,4 +472,10 @@ public final class MigrationEntity {
|
|||||||
.anyMatch(mc -> mc instanceof ManualResizeRedaction && !((ManualResizeRedaction) mc).getUpdateDictionary()) && !(migratedEntity instanceof Image);
|
.anyMatch(mc -> mc instanceof ManualResizeRedaction && !((ManualResizeRedaction) mc).getUpdateDictionary()) && !(migratedEntity instanceof Image);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean needsForceDeletion() {
|
||||||
|
|
||||||
|
return manualChanges.stream()
|
||||||
|
.anyMatch(mc -> mc instanceof ManualForceRedaction) && this.precursorEntity != null && this.precursorEntity.removed();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -120,7 +120,8 @@ public class PrecursorEntity implements IEntity {
|
|||||||
EntityType entityType = getEntityType(entryType);
|
EntityType entityType = getEntityType(entryType);
|
||||||
String value = Optional.ofNullable(importedRedaction.getValue())
|
String value = Optional.ofNullable(importedRedaction.getValue())
|
||||||
.orElse("");
|
.orElse("");
|
||||||
return PrecursorEntity.builder()
|
|
||||||
|
PrecursorEntityBuilder precursorEntityBuilder = PrecursorEntity.builder()
|
||||||
.id(importedRedaction.getId())
|
.id(importedRedaction.getId())
|
||||||
.value(value)
|
.value(value)
|
||||||
.entityPosition(rectangleWithPages)
|
.entityPosition(rectangleWithPages)
|
||||||
@ -130,14 +131,21 @@ public class PrecursorEntity implements IEntity {
|
|||||||
.orElse(""))
|
.orElse(""))
|
||||||
.type(Optional.ofNullable(importedRedaction.getType())
|
.type(Optional.ofNullable(importedRedaction.getType())
|
||||||
.orElse(IMPORTED_REDACTION_TYPE))
|
.orElse(IMPORTED_REDACTION_TYPE))
|
||||||
.section(importedRedaction.getManualOverwriteSection())
|
.section(Optional.ofNullable(importedRedaction.getSection())
|
||||||
|
.orElse(""))
|
||||||
.entityType(entityType)
|
.entityType(entityType)
|
||||||
.isDictionaryEntry(false)
|
.isDictionaryEntry(false)
|
||||||
.isDossierDictionaryEntry(false)
|
.isDossierDictionaryEntry(false)
|
||||||
|
.manualOverwrite(new ManualChangeOverwrite(entityType))
|
||||||
.rectangle(value.isBlank() || entryType.equals(EntryType.IMAGE) || entryType.equals(EntryType.IMAGE_HINT) || entryType.equals(EntryType.AREA))
|
.rectangle(value.isBlank() || entryType.equals(EntryType.IMAGE) || entryType.equals(EntryType.IMAGE_HINT) || entryType.equals(EntryType.AREA))
|
||||||
.manualOverwrite(new ManualChangeOverwrite(entityType, importedRedaction.getManualOverwriteSection()))
|
.engines(Set.of(Engine.IMPORTED));
|
||||||
.engines(Set.of(Engine.IMPORTED))
|
|
||||||
.build();
|
if (importedRedaction.getManualOverwriteSection() != null && !importedRedaction.getManualOverwriteSection().isEmpty()) {
|
||||||
|
precursorEntityBuilder.section(importedRedaction.getManualOverwriteSection())
|
||||||
|
.manualOverwrite(new ManualChangeOverwrite(entityType, importedRedaction.getManualOverwriteSection()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return precursorEntityBuilder.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -288,8 +288,8 @@ public class DocumentTree {
|
|||||||
if (treeId.isEmpty()) {
|
if (treeId.isEmpty()) {
|
||||||
return root;
|
return root;
|
||||||
}
|
}
|
||||||
Entry entry = root.children.get(treeId.get(0));
|
Entry entry = root;
|
||||||
for (int id : treeId.subList(1, treeId.size())) {
|
for (int id : treeId) {
|
||||||
entry = entry.children.get(id);
|
entry = entry.children.get(id);
|
||||||
}
|
}
|
||||||
return entry;
|
return entry;
|
||||||
|
|||||||
@ -3,8 +3,10 @@ package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
|||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||||
|
|
||||||
@ -35,7 +37,7 @@ public class Page {
|
|||||||
Integer width;
|
Integer width;
|
||||||
Integer rotation;
|
Integer rotation;
|
||||||
|
|
||||||
List<SemanticNode> mainBody;
|
List<AtomicTextBlock> textBlocksOnPage;
|
||||||
Header header;
|
Header header;
|
||||||
Footer footer;
|
Footer footer;
|
||||||
|
|
||||||
@ -53,13 +55,63 @@ public class Page {
|
|||||||
*/
|
*/
|
||||||
public TextBlock getMainBodyTextBlock() {
|
public TextBlock getMainBodyTextBlock() {
|
||||||
|
|
||||||
return mainBody.stream()
|
return textBlocksOnPage.stream()
|
||||||
.filter(SemanticNode::isLeaf)
|
.filter(atb -> !atb.isEmpty())
|
||||||
.map(SemanticNode::getTextBlock)
|
|
||||||
.collect(new TextBlockCollector());
|
.collect(new TextBlockCollector());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves the highest SemanticNodes, which appear only on this page. It is achieved by traversing the DocumentTree up, until a SemanticNode's direct parent is no longer exclusively on this page.
|
||||||
|
*
|
||||||
|
* @return A list which contains the highes SemanticNodes, which appear only on this page.
|
||||||
|
*/
|
||||||
|
public List<SemanticNode> getMainBody() {
|
||||||
|
|
||||||
|
return textBlocksOnPage.stream()
|
||||||
|
.map(AtomicTextBlock::getParent)
|
||||||
|
.map(this::getHighestParentOnlyOnPage)
|
||||||
|
.distinct()
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves the highest SemanticNodes which are present on the page. There might be multiples, as two or more Main Sections start on a page.
|
||||||
|
* This is achieved by traversing up the document tree and returning all SemanticNodes whose direct parent is the Document
|
||||||
|
*
|
||||||
|
* @return A list of the highest SemanticNodes present on this page
|
||||||
|
*/
|
||||||
|
public Stream<SemanticNode> streamHighestSemanticNodesOnPage() {
|
||||||
|
|
||||||
|
return textBlocksOnPage.stream()
|
||||||
|
.map(AtomicTextBlock::getParent)
|
||||||
|
.map(this::getHighestSemanticNodeOnPage)
|
||||||
|
.distinct();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private SemanticNode getHighestParentOnlyOnPage(SemanticNode node) {
|
||||||
|
|
||||||
|
SemanticNode currentNode = node;
|
||||||
|
while (currentNode.hasParent() && currentNode.getParent().onlyOnPage(this)) {
|
||||||
|
currentNode = currentNode.getParent();
|
||||||
|
}
|
||||||
|
return currentNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private SemanticNode getHighestSemanticNodeOnPage(SemanticNode node) {
|
||||||
|
|
||||||
|
SemanticNode currentNode = node;
|
||||||
|
while (currentNode.hasParent() //
|
||||||
|
&& !currentNode.getParent().getType().equals(NodeType.DOCUMENT)) {
|
||||||
|
currentNode = currentNode.getParent();
|
||||||
|
}
|
||||||
|
return currentNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
|
||||||
|
|||||||
@ -1,10 +1,12 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||||
|
|
||||||
import java.awt.geom.Rectangle2D;
|
import java.awt.geom.Rectangle2D;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||||
@ -33,7 +35,6 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true)
|
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true)
|
||||||
public class Section extends AbstractSemanticNode {
|
public class Section extends AbstractSemanticNode {
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public NodeType getType() {
|
public NodeType getType() {
|
||||||
|
|
||||||
@ -60,7 +61,6 @@ public class Section extends AbstractSemanticNode {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
|
||||||
@ -85,7 +85,14 @@ public class Section extends AbstractSemanticNode {
|
|||||||
*/
|
*/
|
||||||
public boolean anyHeadlineContainsString(String value) {
|
public boolean anyHeadlineContainsString(String value) {
|
||||||
|
|
||||||
return streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsString(value));
|
boolean found = streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsString(value)) || getHeadline().containsString(value);
|
||||||
|
if (!found) {
|
||||||
|
List<Headline> previousHeadlines = new ArrayList<>();
|
||||||
|
headlinesByPreviousSibling(this, previousHeadlines);
|
||||||
|
return previousHeadlines.stream()
|
||||||
|
.anyMatch(headline -> headline.containsString(value));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -97,8 +104,37 @@ public class Section extends AbstractSemanticNode {
|
|||||||
*/
|
*/
|
||||||
public boolean anyHeadlineContainsStringIgnoreCase(String value) {
|
public boolean anyHeadlineContainsStringIgnoreCase(String value) {
|
||||||
|
|
||||||
return streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsStringIgnoreCase(value));
|
boolean found = streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsStringIgnoreCase(value)) || getHeadline().containsStringIgnoreCase(value);
|
||||||
|
if (!found) {
|
||||||
|
List<Headline> previousHeadlines = new ArrayList<>();
|
||||||
|
headlinesByPreviousSibling(this, previousHeadlines);
|
||||||
|
return previousHeadlines.stream()
|
||||||
|
.anyMatch(headline -> headline.containsStringIgnoreCase(value));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void headlinesByPreviousSibling(SemanticNode section, List<Headline> found) {
|
||||||
|
|
||||||
|
if (section.getPreviousSibling()
|
||||||
|
.isPresent() && section.getPreviousSibling()
|
||||||
|
.get() instanceof Section previousSection) {
|
||||||
|
|
||||||
|
var subnodes = previousSection.streamAllSubNodes()
|
||||||
|
.toList();
|
||||||
|
if (subnodes.size() == 1 && subnodes.get(0) instanceof Headline previousHeadline) {
|
||||||
|
found.add(previousHeadline);
|
||||||
|
headlinesByPreviousSibling(previousSection, found);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (section.getPreviousSibling()
|
||||||
|
.isPresent() && section.getPreviousSibling()
|
||||||
|
.get() instanceof Headline previousHeadline) {
|
||||||
|
found.add(previousHeadline);
|
||||||
|
headlinesByPreviousSibling(previousHeadline, found);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -808,4 +808,17 @@ public interface SemanticNode {
|
|||||||
streamChildren().forEach(childNode -> childNode.accept(visitor));
|
streamChildren().forEach(childNode -> childNode.accept(visitor));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks wether this SemanticNode appears on a single page only, and if that page is the provided one.
|
||||||
|
*
|
||||||
|
* @param page the page to check
|
||||||
|
* @return true, when SemanticNode is on a single page only and the page is the provided page. Otherwise, false.
|
||||||
|
*/
|
||||||
|
default boolean onlyOnPage(Page page) {
|
||||||
|
|
||||||
|
Set<Page> pages = getPages();
|
||||||
|
return pages.size() == 1 && pages.contains(page);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -110,7 +110,14 @@ public class RedactionMessageReceiver {
|
|||||||
log.info("-------------------------------------------------------------------------------------------------");
|
log.info("-------------------------------------------------------------------------------------------------");
|
||||||
shouldRespond = false;
|
shouldRespond = false;
|
||||||
break;
|
break;
|
||||||
|
case IMPORTED_REDACTIONS_ONLY:
|
||||||
|
log.info("------------------------------Imported Redactions Analysis Only------------------------------------------");
|
||||||
|
log.info("Starting Imported Redactions Analysis Only for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||||
|
log.debug(analyzeRequest.getManualRedactions().toString());
|
||||||
|
result = analyzeService.analyzeImportedRedactionsOnly(analyzeRequest);
|
||||||
|
log.info("Successful Imported Redactions Analysis Only dossier {} file {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||||
|
log.info("-------------------------------------------------------------------------------------------------");
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
throw new IllegalArgumentException("Unknown MessageType: " + analyzeRequest.getMessageType());
|
throw new IllegalArgumentException("Unknown MessageType: " + analyzeRequest.getMessageType());
|
||||||
}
|
}
|
||||||
|
|||||||
@ -23,8 +23,10 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileTyp
|
|||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.componentlog.ComponentLog;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.componentlog.ComponentLog;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogChanges;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogChanges;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedLegalBases;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.mapper.ImportedLegalBasisMapper;
|
||||||
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
|
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
|
||||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel;
|
import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.KieWrapper;
|
import com.iqser.red.service.redaction.v1.server.model.KieWrapper;
|
||||||
@ -76,6 +78,7 @@ public class AnalyzeService {
|
|||||||
ImportedRedactionEntryService importedRedactionEntryService;
|
ImportedRedactionEntryService importedRedactionEntryService;
|
||||||
ObservedStorageService observedStorageService;
|
ObservedStorageService observedStorageService;
|
||||||
FunctionTimerValues redactmanagerAnalyzePagewiseValues;
|
FunctionTimerValues redactmanagerAnalyzePagewiseValues;
|
||||||
|
ImportedLegalBasisMapper importedLegalBasisMapper = ImportedLegalBasisMapper.INSTANCE;
|
||||||
|
|
||||||
|
|
||||||
@Timed("redactmanager_reanalyze")
|
@Timed("redactmanager_reanalyze")
|
||||||
@ -128,7 +131,7 @@ public class AnalyzeService {
|
|||||||
document,
|
document,
|
||||||
document.getNumberOfPages(),
|
document.getNumberOfPages(),
|
||||||
true,
|
true,
|
||||||
Collections.emptySet());
|
new HashSet<>());
|
||||||
}
|
}
|
||||||
|
|
||||||
KieWrapper kieWrapperEntityRules = kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.ENTITY);
|
KieWrapper kieWrapperEntityRules = kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.ENTITY);
|
||||||
@ -245,6 +248,39 @@ public class AnalyzeService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Timed("redactmanager_analyzeImportedRedactionsOnly")
|
||||||
|
@Observed(name = "AnalyzeService", contextualName = "analyzeImportedRedactionsOnly")
|
||||||
|
public AnalyzeResult analyzeImportedRedactionsOnly(AnalyzeRequest analyzeRequest) {
|
||||||
|
|
||||||
|
long startTime = System.currentTimeMillis();
|
||||||
|
|
||||||
|
dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||||
|
|
||||||
|
Document document = DocumentGraphMapper.toDocumentGraph(observedStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
|
||||||
|
log.info("Loaded Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||||
|
|
||||||
|
ImportedRedactions importedRedactions = redactionStorageService.getImportedRedactions(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||||
|
log.info("Loaded Imported Redactions for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||||
|
|
||||||
|
ImportedLegalBases importedLegalBases = redactionStorageService.getImportedLegalBases(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||||
|
log.info("Loaded Imported Legal Bases for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||||
|
|
||||||
|
var notFoundImportedEntries = importedRedactionEntryService.addImportedEntriesAndReturnNotFoundEntries(analyzeRequest, importedRedactions, document);
|
||||||
|
|
||||||
|
EntityLogChanges entityLogChanges = entityLogCreatorService.createInitialEntityLog(analyzeRequest, document, notFoundImportedEntries, new DictionaryVersion(0, 0), 0);
|
||||||
|
|
||||||
|
entityLogChanges.getEntityLog()
|
||||||
|
.setLegalBasis(importedLegalBases.getImportedLegalBases()
|
||||||
|
.stream()
|
||||||
|
.map(importedLegalBasisMapper::toEntityLogLegalBasis)
|
||||||
|
.toList());
|
||||||
|
|
||||||
|
notFoundImportedEntitiesService.processEntityLog(entityLogChanges.getEntityLog(), analyzeRequest, notFoundImportedEntries);
|
||||||
|
|
||||||
|
return finalizeAnalysis(analyzeRequest, startTime, KieWrapper.empty(), entityLogChanges, document, document.getNumberOfPages(), false, new HashSet<>());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private AnalyzeResult finalizeAnalysis(AnalyzeRequest analyzeRequest,
|
private AnalyzeResult finalizeAnalysis(AnalyzeRequest analyzeRequest,
|
||||||
long startTime,
|
long startTime,
|
||||||
KieWrapper kieWrapperComponentRules,
|
KieWrapper kieWrapperComponentRules,
|
||||||
@ -367,7 +403,7 @@ public class AnalyzeService {
|
|||||||
|
|
||||||
return new NerEntitiesModel(nerEntitiesModel.getData().entrySet()
|
return new NerEntitiesModel(nerEntitiesModel.getData().entrySet()
|
||||||
.stream() //
|
.stream() //
|
||||||
.filter(entry -> sectionsToReanalyseIds.contains(entry.getKey())) //
|
.filter(entry -> sectionsToReanalyseIds.contains(getSuperSectionID(entry.getKey()))) //
|
||||||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
|
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -383,4 +419,11 @@ public class AnalyzeService {
|
|||||||
return nerEntities;
|
return nerEntities;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static Integer getSuperSectionID(String section) {
|
||||||
|
|
||||||
|
return NerEntitiesAdapter.sectionNumberToTreeId(section)
|
||||||
|
.get(0);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -223,7 +223,6 @@ public class EntityLogCreatorService {
|
|||||||
|
|
||||||
String type = precursorEntity.getManualOverwrite().getType()
|
String type = precursorEntity.getManualOverwrite().getType()
|
||||||
.orElse(precursorEntity.getType());
|
.orElse(precursorEntity.getType());
|
||||||
boolean isHint = isHint(precursorEntity.getEntityType());
|
|
||||||
return EntityLogEntry.builder()
|
return EntityLogEntry.builder()
|
||||||
.id(precursorEntity.getId())
|
.id(precursorEntity.getId())
|
||||||
.reason(precursorEntity.buildReasonWithManualChangeDescriptions())
|
.reason(precursorEntity.buildReasonWithManualChangeDescriptions())
|
||||||
|
|||||||
@ -40,7 +40,8 @@ public class ComponentMappingFileSystemCache {
|
|||||||
public File getComponentMappingFile(ComponentMappingMetadata metadata) {
|
public File getComponentMappingFile(ComponentMappingMetadata metadata) {
|
||||||
|
|
||||||
Path mappingFile = getMappingFileFromMetadata(metadata);
|
Path mappingFile = getMappingFileFromMetadata(metadata);
|
||||||
Path mappingFileMetaDataFile = mappingFile.resolveSibling(metadata.getName() + METADATA_SUFFIX);
|
Path mappingFileMetaDataFile = getMappingMetadataFileFromMetadata(metadata);
|
||||||
|
|
||||||
synchronized (ComponentMappingFileSystemCache.class) {
|
synchronized (ComponentMappingFileSystemCache.class) {
|
||||||
|
|
||||||
if (fileExistsAndUpToDate(metadata, mappingFile, mappingFileMetaDataFile)) {
|
if (fileExistsAndUpToDate(metadata, mappingFile, mappingFileMetaDataFile)) {
|
||||||
@ -87,6 +88,13 @@ public class ComponentMappingFileSystemCache {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private Path getMappingMetadataFileFromMetadata(ComponentMappingMetadata metadata) {
|
||||||
|
|
||||||
|
Path tenantStem = mappingFileDir.resolve(TenantContext.getTenantId());
|
||||||
|
return tenantStem.resolve(metadata.getStorageId() + METADATA_SUFFIX);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private boolean fileExistsAndUpToDate(ComponentMappingMetadata metadata, Path mappingFile, Path mappingFileMetaDataFile) {
|
private boolean fileExistsAndUpToDate(ComponentMappingMetadata metadata, Path mappingFile, Path mappingFileMetaDataFile) {
|
||||||
|
|
||||||
if (mappingFile.toFile().exists() && mappingFile.toFile().isFile() && mappingFileMetaDataFile.toFile().exists() && mappingFileMetaDataFile.toFile().isFile()) {
|
if (mappingFile.toFile().exists() && mappingFile.toFile().isFile() && mappingFileMetaDataFile.toFile().exists() && mappingFileMetaDataFile.toFile().isFile()) {
|
||||||
|
|||||||
@ -4,6 +4,7 @@ import java.io.File;
|
|||||||
import java.io.FileReader;
|
import java.io.FileReader;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
|
import java.time.Duration;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
@ -36,7 +37,7 @@ public class ComponentMappingMemoryCache {
|
|||||||
public ComponentMappingMemoryCache(ComponentMappingFileSystemCache componentMappingFileSystemCache) {
|
public ComponentMappingMemoryCache(ComponentMappingFileSystemCache componentMappingFileSystemCache) {
|
||||||
|
|
||||||
this.fileSystemCache = componentMappingFileSystemCache;
|
this.fileSystemCache = componentMappingFileSystemCache;
|
||||||
cache = CacheBuilder.newBuilder().maximumWeight(MAX_NUMBER_OF_LINES).weigher(COMPONENT_MAPPING_WEIGHER).build();
|
cache = CacheBuilder.newBuilder().maximumWeight(MAX_NUMBER_OF_LINES).weigher(COMPONENT_MAPPING_WEIGHER).expireAfterAccess(Duration.ofDays(1)).build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,5 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.service.document;
|
package com.iqser.red.service.redaction.v1.server.service.document;
|
||||||
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
@ -64,7 +63,7 @@ public class DocumentGraphMapper {
|
|||||||
for (DocumentStructure.EntryData entryData : entries) {
|
for (DocumentStructure.EntryData entryData : entries) {
|
||||||
|
|
||||||
List<Page> pages = Arrays.stream(entryData.getPageNumbers())
|
List<Page> pages = Arrays.stream(entryData.getPageNumbers())
|
||||||
.map(pageNumber -> getPage(pageNumber, context))
|
.map(context::getPage)
|
||||||
.toList();
|
.toList();
|
||||||
|
|
||||||
SemanticNode node = switch (entryData.getType()) {
|
SemanticNode node = switch (entryData.getType()) {
|
||||||
@ -83,6 +82,15 @@ public class DocumentGraphMapper {
|
|||||||
if (entryData.getAtomicBlockIds().length > 0) {
|
if (entryData.getAtomicBlockIds().length > 0) {
|
||||||
TextBlock textBlock = toTextBlock(entryData.getAtomicBlockIds(), context, node);
|
TextBlock textBlock = toTextBlock(entryData.getAtomicBlockIds(), context, node);
|
||||||
node.setLeafTextBlock(textBlock);
|
node.setLeafTextBlock(textBlock);
|
||||||
|
|
||||||
|
switch (entryData.getType()) {
|
||||||
|
case HEADER -> pages.forEach(page -> page.setHeader((Header) node));
|
||||||
|
case FOOTER -> pages.forEach(page -> page.setFooter((Footer) node));
|
||||||
|
case IMAGE -> pages.forEach(page -> page.getImages().add((Image) node));
|
||||||
|
default -> textBlock.getAtomicTextBlocks()
|
||||||
|
.forEach(atb -> atb.getPage().getTextBlocksOnPage().add(atb));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
List<Integer> treeId = Arrays.stream(entryData.getTreeId()).boxed()
|
List<Integer> treeId = Arrays.stream(entryData.getTreeId()).boxed()
|
||||||
.toList();
|
.toList();
|
||||||
@ -94,13 +102,8 @@ public class DocumentGraphMapper {
|
|||||||
}
|
}
|
||||||
node.setTreeId(treeId);
|
node.setTreeId(treeId);
|
||||||
|
|
||||||
switch (entryData.getType()) {
|
|
||||||
case HEADER -> pages.forEach(page -> page.setHeader((Header) node));
|
|
||||||
case FOOTER -> pages.forEach(page -> page.setFooter((Footer) node));
|
|
||||||
default -> pages.forEach(page -> page.getMainBody().add(node));
|
|
||||||
}
|
|
||||||
|
|
||||||
newEntries.add(DocumentTree.Entry.builder().treeId(treeId).children(buildEntries(entryData.getChildren(), context)).node(node).build());
|
newEntries.add(DocumentTree.Entry.builder().treeId(treeId).children(buildEntries(entryData.getChildren(), context)).node(node).build());
|
||||||
|
|
||||||
}
|
}
|
||||||
return newEntries;
|
return newEntries;
|
||||||
}
|
}
|
||||||
@ -115,7 +118,7 @@ public class DocumentGraphMapper {
|
|||||||
private Image buildImage(Context context, Map<String, String> properties, Long[] pageNumbers) {
|
private Image buildImage(Context context, Map<String, String> properties, Long[] pageNumbers) {
|
||||||
|
|
||||||
assert pageNumbers.length == 1;
|
assert pageNumbers.length == 1;
|
||||||
Page page = getPage(pageNumbers[0], context);
|
Page page = context.getPage(pageNumbers[0]);
|
||||||
var builder = Image.builder();
|
var builder = Image.builder();
|
||||||
PropertiesMapper.parseImageProperties(properties, builder);
|
PropertiesMapper.parseImageProperties(properties, builder);
|
||||||
return builder.documentTree(context.documentTree).page(page).build();
|
return builder.documentTree(context.documentTree).page(page).build();
|
||||||
@ -161,6 +164,7 @@ public class DocumentGraphMapper {
|
|||||||
return SuperSection.builder().documentTree(context.documentTree).build();
|
return SuperSection.builder().documentTree(context.documentTree).build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private Paragraph buildParagraph(Context context, Map<String, String> properties) {
|
private Paragraph buildParagraph(Context context, Map<String, String> properties) {
|
||||||
|
|
||||||
if (PropertiesMapper.isDuplicateParagraph(properties)) {
|
if (PropertiesMapper.isDuplicateParagraph(properties)) {
|
||||||
@ -189,21 +193,13 @@ public class DocumentGraphMapper {
|
|||||||
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)),
|
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)),
|
||||||
context.documentPositionData.get(Math.toIntExact(atomicTextBlockId)),
|
context.documentPositionData.get(Math.toIntExact(atomicTextBlockId)),
|
||||||
parent,
|
parent,
|
||||||
getPage(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)).getPage(), context));
|
context.getPage(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)).getPage()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private Page buildPage(DocumentPage p) {
|
private Page buildPage(DocumentPage p) {
|
||||||
|
|
||||||
return Page.builder().rotation(p.getRotation()).height(p.getHeight()).width(p.getWidth()).number(p.getNumber()).mainBody(new LinkedList<>()).build();
|
return Page.builder().rotation(p.getRotation()).height(p.getHeight()).width(p.getWidth()).number(p.getNumber()).textBlocksOnPage(new LinkedList<>()).build();
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private Page getPage(Long pageIndex, Context context) {
|
|
||||||
|
|
||||||
Page page = context.pageData.get(Math.toIntExact(pageIndex) - 1);
|
|
||||||
assert page.getNumber() == Math.toIntExact(pageIndex);
|
|
||||||
return page;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -226,6 +222,14 @@ public class DocumentGraphMapper {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private Page getPage(Long pageIndex) {
|
||||||
|
|
||||||
|
Page page = pageData.get(Math.toIntExact(pageIndex) - 1);
|
||||||
|
assert page.getNumber() == Math.toIntExact(pageIndex);
|
||||||
|
return page;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -614,21 +614,21 @@ public class EntityCreationService {
|
|||||||
/**
|
/**
|
||||||
* Looks across the remaining table row to the right of the provided TableCell if any line intersects the y coordinates of the found text.
|
* Looks across the remaining table row to the right of the provided TableCell if any line intersects the y coordinates of the found text.
|
||||||
*
|
*
|
||||||
* @param TextRanges a list of textRanges
|
* @param textRanges a list of textRanges
|
||||||
* @param tableCell the table cell
|
* @param tableCell the table cell
|
||||||
* @param type the type
|
* @param type the type
|
||||||
* @param entityType the entity type
|
* @param entityType the entity type
|
||||||
* @param tableNode the table node
|
* @param tableNode the table node
|
||||||
* @return a stream of RedactionEntities
|
* @return a stream of RedactionEntities
|
||||||
*/
|
*/
|
||||||
private Stream<TextEntity> lineAfterBoundariesAcrossColumns(List<TextRange> TextRanges, TableCell tableCell, String type, EntityType entityType, Table tableNode) {
|
private Stream<TextEntity> lineAfterBoundariesAcrossColumns(List<TextRange> textRanges, TableCell tableCell, String type, EntityType entityType, Table tableNode) {
|
||||||
|
|
||||||
return TextRanges.stream()
|
return textRanges.stream()
|
||||||
.map(boundary -> RectangleTransformations.rectangle2DBBox(tableCell.getTextBlock().getPositions(boundary)))
|
.map(boundary -> RectangleTransformations.rectangle2DBBox(tableCell.getTextBlock().getPositions(boundary)))
|
||||||
.map(bBox -> Pair.of(bBox.getMaxY(), bBox.getMinY()))
|
.map(bBox -> Pair.of(bBox.getMaxY(), bBox.getMinY()))
|
||||||
.map(maxMinPair -> tableNode.streamRow(tableCell.getRow())
|
.map(maxMinPair -> tableNode.streamRow(tableCell.getRow())
|
||||||
.filter(nextTableCell -> nextTableCell.getCol() > tableCell.getCol())
|
.filter(nextTableCell -> nextTableCell.getCol() > tableCell.getCol())
|
||||||
.map(nextTableCell -> RedactionSearchUtility.findTextRangesOfAllLinesInYRange(maxMinPair.getLeft(), maxMinPair.getRight(), nextTableCell.getTextBlock()))
|
.map(nextTableCell -> RedactionSearchUtility.findTextRangesOfAllLinesWithCloseYCoordinates(maxMinPair.getLeft(), maxMinPair.getRight(), nextTableCell.getTextBlock()))
|
||||||
.map(b -> b.trim(tableNode.getTextBlock()))
|
.map(b -> b.trim(tableNode.getTextBlock()))
|
||||||
.filter(boundary -> isValidEntityTextRange(tableNode.getTextBlock(), boundary))
|
.filter(boundary -> isValidEntityTextRange(tableNode.getTextBlock(), boundary))
|
||||||
.map(boundary -> byTextRange(boundary, type, entityType, tableNode))
|
.map(boundary -> byTextRange(boundary, type, entityType, tableNode))
|
||||||
@ -1160,6 +1160,10 @@ public class EntityCreationService {
|
|||||||
|
|
||||||
if (kieSession != null) {
|
if (kieSession != null) {
|
||||||
kieSession.insert(textEntity);
|
kieSession.insert(textEntity);
|
||||||
|
textEntity.getIntersectingNodes()
|
||||||
|
.stream()
|
||||||
|
.filter(nodesInKieSession::contains)
|
||||||
|
.forEach(o -> kieSession.update(kieSession.getFactHandle(o), o));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -11,10 +11,12 @@ import java.util.stream.Collectors;
|
|||||||
|
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity;
|
import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||||
@ -35,6 +37,7 @@ public class EntityFromPrecursorCreationService {
|
|||||||
static double MATCH_THRESHOLD = 10; // Is compared to the average sum of distances in pdf coordinates for each corner of the bounding box of the entities
|
static double MATCH_THRESHOLD = 10; // Is compared to the average sum of distances in pdf coordinates for each corner of the bounding box of the entities
|
||||||
EntityFindingUtility entityFindingUtility;
|
EntityFindingUtility entityFindingUtility;
|
||||||
DictionaryService dictionaryService;
|
DictionaryService dictionaryService;
|
||||||
|
RedactionServiceSettings settings;
|
||||||
|
|
||||||
|
|
||||||
public List<PrecursorEntity> createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions manualRedactions, SemanticNode node, String dossierTemplateId) {
|
public List<PrecursorEntity> createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions manualRedactions, SemanticNode node, String dossierTemplateId) {
|
||||||
@ -92,7 +95,7 @@ public class EntityFromPrecursorCreationService {
|
|||||||
notFoundEntities.add(precursorEntity);
|
notFoundEntities.add(precursorEntity);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
createCorrectEntity(precursorEntity, optionalClosestEntity.get());
|
createCorrectEntity(precursorEntity, optionalClosestEntity.get(), settings.isAnnotationMode());
|
||||||
}
|
}
|
||||||
|
|
||||||
tempEntitiesByValue.values()
|
tempEntitiesByValue.values()
|
||||||
@ -125,12 +128,16 @@ public class EntityFromPrecursorCreationService {
|
|||||||
precursorEntity.getEntityType(),
|
precursorEntity.getEntityType(),
|
||||||
closestEntity.getDeepestFullyContainingNode());
|
closestEntity.getDeepestFullyContainingNode());
|
||||||
} else {
|
} else {
|
||||||
correctEntity = TextEntity.initialEntityNode(closestEntity.getTextRange(),
|
String section = precursorEntity.getManualOverwrite().getSection()
|
||||||
precursorEntity.type(),
|
.orElse(null);
|
||||||
precursorEntity.getEntityType(),
|
if ((section == null || section.isBlank())
|
||||||
precursorEntity.getId(),
|
&& precursorEntity.getSection() != null
|
||||||
precursorEntity.getManualOverwrite().getSection()
|
&& !precursorEntity.getSection().isBlank()
|
||||||
.orElse(null));
|
&& precursorEntity.getEngines().contains(Engine.IMPORTED)) {
|
||||||
|
section = precursorEntity.getSection();
|
||||||
|
}
|
||||||
|
|
||||||
|
correctEntity = TextEntity.initialEntityNode(closestEntity.getTextRange(), precursorEntity.type(), precursorEntity.getEntityType(), precursorEntity.getId(), section);
|
||||||
}
|
}
|
||||||
correctEntity.setDeepestFullyContainingNode(closestEntity.getDeepestFullyContainingNode());
|
correctEntity.setDeepestFullyContainingNode(closestEntity.getDeepestFullyContainingNode());
|
||||||
correctEntity.setIntersectingNodes(new ArrayList<>(closestEntity.getIntersectingNodes()));
|
correctEntity.setIntersectingNodes(new ArrayList<>(closestEntity.getIntersectingNodes()));
|
||||||
|
|||||||
@ -177,7 +177,7 @@ public class NerEntitiesAdapter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static List<Integer> sectionNumberToTreeId(String sectionNumber) {
|
public static List<Integer> sectionNumberToTreeId(String sectionNumber) {
|
||||||
|
|
||||||
return Arrays.stream(sectionNumber.split("\\."))
|
return Arrays.stream(sectionNumber.split("\\."))
|
||||||
.map(Integer::parseInt)
|
.map(Integer::parseInt)
|
||||||
|
|||||||
@ -11,7 +11,6 @@ import java.util.stream.Stream;
|
|||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
|
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedaction;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedaction;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
|
||||||
@ -27,7 +26,6 @@ import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryIncr
|
|||||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryIncrementValue;
|
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryIncrementValue;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation;
|
import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
|
|
||||||
|
|
||||||
import io.micrometer.core.annotation.Timed;
|
import io.micrometer.core.annotation.Timed;
|
||||||
import lombok.AccessLevel;
|
import lombok.AccessLevel;
|
||||||
@ -51,7 +49,6 @@ public class SectionFinderService {
|
|||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
Set<Integer> sectionsToReanalyse = new HashSet<>();
|
Set<Integer> sectionsToReanalyse = new HashSet<>();
|
||||||
|
|
||||||
|
|
||||||
var dictionaryIncrementsSearch = new SearchImplementation(dictionaryIncrement.getValues()
|
var dictionaryIncrementsSearch = new SearchImplementation(dictionaryIncrement.getValues()
|
||||||
.stream()
|
.stream()
|
||||||
.map(DictionaryIncrementValue::getValue)
|
.map(DictionaryIncrementValue::getValue)
|
||||||
@ -82,9 +79,7 @@ public class SectionFinderService {
|
|||||||
return document.getPages()
|
return document.getPages()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(page -> relevantPagesForReanalysis.contains(page.getNumber()))
|
.filter(page -> relevantPagesForReanalysis.contains(page.getNumber()))
|
||||||
.flatMap(page -> Stream.concat(page.getMainBody()
|
.flatMap(page -> Stream.concat(page.streamHighestSemanticNodesOnPage(), Stream.of(page.getHeader(), page.getFooter())))
|
||||||
.stream()
|
|
||||||
.filter(node -> node.getType().equals(NodeType.SECTION)), Stream.of(page.getHeader(), page.getFooter())))
|
|
||||||
.map(node -> node.getTreeId()
|
.map(node -> node.getTreeId()
|
||||||
.get(0))
|
.get(0))
|
||||||
.toList();
|
.toList();
|
||||||
|
|||||||
@ -73,9 +73,10 @@ public class ComponentDroolsExecutionService {
|
|||||||
entities.add(Entity.fromEntityLogEntry(entry, document, entry.getStartOffset(), entry.getEndOffset()));
|
entities.add(Entity.fromEntityLogEntry(entry, document, entry.getStartOffset(), entry.getEndOffset()));
|
||||||
if (entry.getDuplicatedTextRanges() != null && !entry.getDuplicatedTextRanges().isEmpty()) {
|
if (entry.getDuplicatedTextRanges() != null && !entry.getDuplicatedTextRanges().isEmpty()) {
|
||||||
entry.getDuplicatedTextRanges()
|
entry.getDuplicatedTextRanges()
|
||||||
.forEach(duplicatedTextRange -> {
|
.forEach(duplicatedTextRange -> entities.add(Entity.fromEntityLogEntry(entry,
|
||||||
entities.add(Entity.fromEntityLogEntry(entry, document, duplicatedTextRange.getStart(), duplicatedTextRange.getEnd()));
|
document,
|
||||||
});
|
duplicatedTextRange.getStart(),
|
||||||
|
duplicatedTextRange.getEnd())));
|
||||||
}
|
}
|
||||||
return entities.stream();
|
return entities.stream();
|
||||||
})
|
})
|
||||||
@ -94,8 +95,7 @@ public class ComponentDroolsExecutionService {
|
|||||||
});
|
});
|
||||||
|
|
||||||
try {
|
try {
|
||||||
completableFuture.orTimeout(settings.getDroolsExecutionTimeoutSecs(document.getNumberOfPages()), TimeUnit.SECONDS)
|
completableFuture.get(settings.getDroolsExecutionTimeoutSecs(document.getNumberOfPages()), TimeUnit.SECONDS);
|
||||||
.get();
|
|
||||||
} catch (ExecutionException e) {
|
} catch (ExecutionException e) {
|
||||||
kieSession.dispose();
|
kieSession.dispose();
|
||||||
if (e.getCause() instanceof TimeoutException) {
|
if (e.getCause() instanceof TimeoutException) {
|
||||||
@ -105,6 +105,8 @@ public class ComponentDroolsExecutionService {
|
|||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
kieSession.dispose();
|
kieSession.dispose();
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
|
} catch (TimeoutException e) {
|
||||||
|
throw new DroolsTimeoutException(e, false, RuleFileType.COMPONENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
List<FileAttribute> resultingFileAttributes = getFileAttributes(kieSession);
|
List<FileAttribute> resultingFileAttributes = getFileAttributes(kieSession);
|
||||||
|
|||||||
@ -45,7 +45,7 @@ public class DroolsValidationService {
|
|||||||
private final KieContainerCreationService kieContainerCreationService;
|
private final KieContainerCreationService kieContainerCreationService;
|
||||||
private final DeprecatedElementsFinder deprecatedElementsFinder;
|
private final DeprecatedElementsFinder deprecatedElementsFinder;
|
||||||
private static final Pattern allowedImportsPattern = Pattern.compile(
|
private static final Pattern allowedImportsPattern = Pattern.compile(
|
||||||
"^(?:import\\s+static\\s+)?(?:import\\s+)?(?:com\\.knecon\\.fforesight|com\\.iqser\\.red|java\\.util)\\..*;$");
|
"^(?:import\\s+static\\s+)?(?:import\\s+)?(?:com\\.knecon\\.fforesight|com\\.iqser\\.red|java\\.util|java\\.text)\\..*;$");
|
||||||
public static final String LINEBREAK_MATCHER = "\\R";
|
public static final String LINEBREAK_MATCHER = "\\R";
|
||||||
|
|
||||||
|
|
||||||
@ -283,7 +283,9 @@ public class DroolsValidationService {
|
|||||||
|
|
||||||
private DroolsBlacklistErrorMessage checkAndGetBlackListedMessages(SearchImplementation blacklistedKeywordSearchImplementation, String stringToCheck, int lineIndexStart) {
|
private DroolsBlacklistErrorMessage checkAndGetBlackListedMessages(SearchImplementation blacklistedKeywordSearchImplementation, String stringToCheck, int lineIndexStart) {
|
||||||
|
|
||||||
String sanitizedRuleText = StringUtils.deleteWhitespace(stringToCheck);
|
String nonWhitespaceRuleText = StringUtils.deleteWhitespace(stringToCheck);
|
||||||
|
String sanitizedRuleText= nonWhitespaceRuleText.replaceAll("\"(\\\\.|[^\"\\\\])*\"|'(\\\\.|[^'\\\\])*'" ,"");
|
||||||
|
|
||||||
List<SearchImplementation.MatchPosition> matches = blacklistedKeywordSearchImplementation.getMatches(sanitizedRuleText);
|
List<SearchImplementation.MatchPosition> matches = blacklistedKeywordSearchImplementation.getMatches(sanitizedRuleText);
|
||||||
|
|
||||||
if (!matches.isEmpty()) {
|
if (!matches.isEmpty()) {
|
||||||
|
|||||||
@ -129,8 +129,7 @@ public class EntityDroolsExecutionService {
|
|||||||
});
|
});
|
||||||
|
|
||||||
try {
|
try {
|
||||||
completableFuture.orTimeout(settings.getDroolsExecutionTimeoutSecs(document.getNumberOfPages()), TimeUnit.SECONDS)
|
completableFuture.get(settings.getDroolsExecutionTimeoutSecs(document.getNumberOfPages()), TimeUnit.SECONDS);
|
||||||
.get();
|
|
||||||
} catch (ExecutionException e) {
|
} catch (ExecutionException e) {
|
||||||
kieSession.dispose();
|
kieSession.dispose();
|
||||||
if (e.getCause() instanceof TimeoutException) {
|
if (e.getCause() instanceof TimeoutException) {
|
||||||
@ -140,6 +139,8 @@ public class EntityDroolsExecutionService {
|
|||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
kieSession.dispose();
|
kieSession.dispose();
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
|
} catch (TimeoutException e) {
|
||||||
|
throw new DroolsTimeoutException(e, false, RuleFileType.ENTITY);
|
||||||
}
|
}
|
||||||
|
|
||||||
List<FileAttribute> resultingFileAttributes = getFileAttributes(kieSession);
|
List<FileAttribute> resultingFileAttributes = getFileAttributes(kieSession);
|
||||||
|
|||||||
@ -32,7 +32,7 @@ import lombok.experimental.UtilityClass;
|
|||||||
public class RuleFileParser {
|
public class RuleFileParser {
|
||||||
|
|
||||||
private final static Pattern ruleIdentifierInCodeFinder = Pattern.compile(
|
private final static Pattern ruleIdentifierInCodeFinder = Pattern.compile(
|
||||||
"\\b(?:redact|apply|skip|remove|ignore|applyWithLineBreaks|applyWithReferences|skipWithReferences)\\s*\\(\"([a-zA-Z0-9]+.\\d+.\\d+)\",.*(?:, .*)?\\)");
|
"\\b(?:redact|apply|skip|remove|ignore|applyWithLineBreaks|applyWithReferences|skipWithReferences)\\s*\\(\\s*\"([a-zA-Z0-9]+.\\d+.\\d+)\"\\s*,\\s*.*(?:\\s*,\\s*.*)\\s*?\\)");
|
||||||
|
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
@ -78,7 +78,8 @@ public class RuleFileParser {
|
|||||||
.map(GlobalDescr::getLine)
|
.map(GlobalDescr::getLine)
|
||||||
.orElse(0),
|
.orElse(0),
|
||||||
allQueries,
|
allQueries,
|
||||||
ruleClasses, customDroolsValidation);
|
ruleClasses,
|
||||||
|
customDroolsValidation);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -9,10 +9,12 @@ import java.util.Set;
|
|||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.springframework.cache.annotation.Cacheable;
|
import org.springframework.cache.annotation.Cacheable;
|
||||||
|
import org.springframework.context.annotation.Import;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.componentlog.ComponentLog;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.componentlog.ComponentLog;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedLegalBases;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactionsPerPage;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactionsPerPage;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||||
@ -147,6 +149,21 @@ public class RedactionStorageService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@Timed("redactmanager_getImportedLegalBases")
|
||||||
|
public ImportedLegalBases getImportedLegalBases(String dossierId, String fileId) {
|
||||||
|
|
||||||
|
try {
|
||||||
|
return storageService.readJSONObject(TenantContext.getTenantId(),
|
||||||
|
StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMPORTED_LEGAL_BASES),
|
||||||
|
ImportedLegalBases.class);
|
||||||
|
} catch (StorageObjectDoesNotExist e) {
|
||||||
|
log.debug("Imported legal bases not available.");
|
||||||
|
return new ImportedLegalBases();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Deprecated(forRemoval = true)
|
@Deprecated(forRemoval = true)
|
||||||
@Timed("redactmanager_getRedactionLog")
|
@Timed("redactmanager_getRedactionLog")
|
||||||
public RedactionLog getRedactionLog(String dossierId, String fileId) {
|
public RedactionLog getRedactionLog(String dossierId, String fileId) {
|
||||||
|
|||||||
@ -18,7 +18,7 @@ public class MigratedIdsCollector implements Collector<MigrationEntity, Migrated
|
|||||||
@Override
|
@Override
|
||||||
public Supplier<MigratedIds> supplier() {
|
public Supplier<MigratedIds> supplier() {
|
||||||
|
|
||||||
return () -> new MigratedIds(new LinkedList<>(), Collections.emptyList());
|
return () -> new MigratedIds(new LinkedList<>(), Collections.emptyList(), Collections.emptyList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -3,8 +3,10 @@ package com.iqser.red.service.redaction.v1.server.utils;
|
|||||||
import static java.lang.String.format;
|
import static java.lang.String.format;
|
||||||
|
|
||||||
import java.awt.geom.Rectangle2D;
|
import java.awt.geom.Rectangle2D;
|
||||||
|
import java.awt.geom.RectangularShape;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.function.Predicate;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import java.util.stream.IntStream;
|
import java.util.stream.IntStream;
|
||||||
@ -154,14 +156,8 @@ public class RedactionSearchUtility {
|
|||||||
*/
|
*/
|
||||||
public static TextRange findTextRangesOfAllLinesInYRange(double maxY, double minY, TextBlock textBlock) {
|
public static TextRange findTextRangesOfAllLinesInYRange(double maxY, double minY, TextBlock textBlock) {
|
||||||
|
|
||||||
List<TextRange> lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed()
|
Predicate<TextRange> isWithinYRange = lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary);
|
||||||
.map(textBlock::getLineTextRange)
|
return filterLineBoundaries(textBlock, isWithinYRange);
|
||||||
.filter(lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary))
|
|
||||||
.toList();
|
|
||||||
if (lineBoundaries.isEmpty()) {
|
|
||||||
return new TextRange(textBlock.getTextRange().start(), textBlock.getTextRange().start());
|
|
||||||
}
|
|
||||||
return TextRange.merge(lineBoundaries);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -172,6 +168,49 @@ public class RedactionSearchUtility {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Identifies all lines within a text block that have roughly the same vertical coordinates.
|
||||||
|
*
|
||||||
|
* @param maxY The maximum Y-coordinate of the vertical range.
|
||||||
|
* @param minY The minimum Y-coordinate of the vertical range.
|
||||||
|
* @param textBlock The text block containing the lines to be checked.
|
||||||
|
* @return A {@link TextRange} encompassing all lines within the specified Y-coordinate range.
|
||||||
|
*/
|
||||||
|
public static TextRange findTextRangesOfAllLinesWithCloseYCoordinates(Double maxY, Double minY, TextBlock textBlock) {
|
||||||
|
|
||||||
|
double averageLineHeight = IntStream.range(0, textBlock.numberOfLines()).boxed()
|
||||||
|
.map(textBlock::getLineTextRange)
|
||||||
|
.flatMap((TextRange stringTextRange) -> textBlock.getPositions(stringTextRange)
|
||||||
|
.stream())
|
||||||
|
.map(RectangularShape::getHeight)
|
||||||
|
.mapToDouble(Double::doubleValue).average()
|
||||||
|
.orElse(0);
|
||||||
|
Predicate<TextRange> hasCloseYRange = lineBoundary -> areYCoordinatesClose(maxY, minY, textBlock, lineBoundary, averageLineHeight);
|
||||||
|
|
||||||
|
return filterLineBoundaries(textBlock, hasCloseYRange);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static boolean areYCoordinatesClose(Double maxY, Double minY, TextBlock textBlock, TextRange lineTextRange, double averageLineHeight) {
|
||||||
|
|
||||||
|
Rectangle2D lineBBox = RectangleTransformations.rectangle2DBBox(textBlock.getPositions(lineTextRange));
|
||||||
|
return Math.abs(lineBBox.getMinY() - minY) <= averageLineHeight && Math.abs(maxY - lineBBox.getMaxY()) <= averageLineHeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static TextRange filterLineBoundaries(TextBlock textBlock, Predicate<TextRange> textRangePredicate) {
|
||||||
|
|
||||||
|
List<TextRange> lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed()
|
||||||
|
.map(textBlock::getLineTextRange)
|
||||||
|
.filter(textRangePredicate)
|
||||||
|
.toList();
|
||||||
|
if (lineBoundaries.isEmpty()) {
|
||||||
|
return new TextRange(textBlock.getTextRange().start(), textBlock.getTextRange().start());
|
||||||
|
}
|
||||||
|
return TextRange.merge(lineBoundaries);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finds TextRanges matching a regex pattern within a TextBlock.
|
* Finds TextRanges matching a regex pattern within a TextBlock.
|
||||||
*
|
*
|
||||||
|
|||||||
@ -16,6 +16,9 @@ project.version: 1.0-SNAPSHOT
|
|||||||
server:
|
server:
|
||||||
port: 8080
|
port: 8080
|
||||||
|
|
||||||
|
lifecycle:
|
||||||
|
base-package: com.iqser.red.service.redaction
|
||||||
|
|
||||||
spring:
|
spring:
|
||||||
application:
|
application:
|
||||||
name: redaction-service
|
name: redaction-service
|
||||||
|
|||||||
@ -14,6 +14,8 @@ dd MMMM yyyy
|
|||||||
d MMMM yyyy
|
d MMMM yyyy
|
||||||
MMMM dd, yyyy
|
MMMM dd, yyyy
|
||||||
MMMM d, yyyy
|
MMMM d, yyyy
|
||||||
|
MMMM, d yyyy
|
||||||
|
MMMM d,yyyy
|
||||||
dd.MM.yyyy
|
dd.MM.yyyy
|
||||||
d.MM.yyyy
|
d.MM.yyyy
|
||||||
yyyy/MM/dd
|
yyyy/MM/dd
|
||||||
@ -28,6 +30,7 @@ dd['.'] MMM yyyy
|
|||||||
d['.'] MMM yyyy
|
d['.'] MMM yyyy
|
||||||
dd['th']['st']['nd']['rd'] 'of' MMMM, yyyy
|
dd['th']['st']['nd']['rd'] 'of' MMMM, yyyy
|
||||||
d['th']['st']['nd']['rd'] 'of' MMMM, yyyy
|
d['th']['st']['nd']['rd'] 'of' MMMM, yyyy
|
||||||
|
d['st']['nd']['rd']['th'] MMMM yyyy
|
||||||
MMMM dd['th']['st']['nd']['rd'], yyyy
|
MMMM dd['th']['st']['nd']['rd'], yyyy
|
||||||
MMMM d['th']['st']['nd']['rd'], yyyy
|
MMMM d['th']['st']['nd']['rd'], yyyy
|
||||||
yyyy, MMMM dd
|
yyyy, MMMM dd
|
||||||
@ -72,3 +75,5 @@ dd.MM.yy
|
|||||||
d.MM.yy
|
d.MM.yy
|
||||||
dd MMM. yyyy
|
dd MMM. yyyy
|
||||||
d MMM. yyyy
|
d MMM. yyyy
|
||||||
|
d-MMMM-yyyy
|
||||||
|
dd-MMMM-yyyy
|
||||||
|
|||||||
@ -114,13 +114,14 @@ public abstract class AbstractRedactionIntegrationTest {
|
|||||||
public static final String PII_TYPE_ID = DICTIONARY_PII + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
public static final String PII_TYPE_ID = DICTIONARY_PII + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||||
public static final String TEST_METHOD_TYPE_ID = TEST_METHOD_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
public static final String TEST_METHOD_TYPE_ID = TEST_METHOD_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||||
public static final String PUBLISHED_INFORMATION_TYPE_ID = PUBLISHED_INFORMATION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
public static final String PUBLISHED_INFORMATION_TYPE_ID = PUBLISHED_INFORMATION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||||
public static final String DOSSIER_PUBLISHED_INFORMATION_TYPE_ID = PUBLISHED_INFORMATION_TYPE_ID+ ":" + TEST_DOSSIER_ID;
|
public static final String DOSSIER_PUBLISHED_INFORMATION_TYPE_ID = PUBLISHED_INFORMATION_TYPE_ID + ":" + TEST_DOSSIER_ID;
|
||||||
public static final String MUST_REDACT_TYPE_ID = MUST_REDACT_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
public static final String MUST_REDACT_TYPE_ID = MUST_REDACT_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||||
public static final String HINT_ONLY_TYPE_ID = HINT_ONLY_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
public static final String HINT_ONLY_TYPE_ID = HINT_ONLY_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||||
public static final String REDACTION_TYPE_ID = REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
public static final String REDACTION_TYPE_ID = REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||||
public static final String NO_REDACTION_TYPE_ID = NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
public static final String NO_REDACTION_TYPE_ID = NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||||
public static final String SPONSOR_TYPE_ID = DICTIONARY_SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
public static final String SPONSOR_TYPE_ID = DICTIONARY_SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||||
public static final String AUTHOR_TYPE_ID = DICTIONARY_AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
public static final String AUTHOR_TYPE_ID = DICTIONARY_AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||||
|
public static final String DOSSIER_AUTHOR_TYPE_ID = AUTHOR_TYPE_ID + ":" + TEST_DOSSIER_ID;
|
||||||
public static final String ADDRESS_TYPE_ID = DICTIONARY_ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
public static final String ADDRESS_TYPE_ID = DICTIONARY_ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||||
public static final String VERTEBRATE_TYPE_ID = VERTEBRATE_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
public static final String VERTEBRATE_TYPE_ID = VERTEBRATE_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||||
|
|
||||||
@ -250,8 +251,10 @@ public abstract class AbstractRedactionIntegrationTest {
|
|||||||
true));
|
true));
|
||||||
when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(IMPORTED_REDACTION_INDICATOR,
|
when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(IMPORTED_REDACTION_INDICATOR,
|
||||||
true));
|
true));
|
||||||
when(dictionaryClient.getDictionaryForType(DOSSIER_PUBLISHED_INFORMATION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PUBLISHED_INFORMATION_INDICATOR,
|
when(dictionaryClient.getDictionaryForType(DOSSIER_PUBLISHED_INFORMATION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
|
||||||
true));
|
PUBLISHED_INFORMATION_INDICATOR,
|
||||||
|
true));
|
||||||
|
when(dictionaryClient.getDictionaryForType(DOSSIER_AUTHOR_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(DICTIONARY_AUTHOR, true));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -350,6 +353,7 @@ public abstract class AbstractRedactionIntegrationTest {
|
|||||||
.collect(Collectors.toSet()));
|
.collect(Collectors.toSet()));
|
||||||
dossierDictionary.put(IMPORTED_REDACTION_INDICATOR, new ArrayList<>());
|
dossierDictionary.put(IMPORTED_REDACTION_INDICATOR, new ArrayList<>());
|
||||||
dossierDictionary.put(PUBLISHED_INFORMATION_INDICATOR, new ArrayList<>());
|
dossierDictionary.put(PUBLISHED_INFORMATION_INDICATOR, new ArrayList<>());
|
||||||
|
dossierDictionary.put(DICTIONARY_AUTHOR, new ArrayList<>());
|
||||||
|
|
||||||
falsePositive.computeIfAbsent(DICTIONARY_PII, v -> new ArrayList<>())
|
falsePositive.computeIfAbsent(DICTIONARY_PII, v -> new ArrayList<>())
|
||||||
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt")
|
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt")
|
||||||
|
|||||||
@ -9,6 +9,7 @@ import static org.mockito.Mockito.when;
|
|||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
@ -17,11 +18,13 @@ import java.util.HashMap;
|
|||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
import java.util.zip.GZIPInputStream;
|
import java.util.zip.GZIPInputStream;
|
||||||
|
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
@ -56,7 +59,6 @@ import com.iqser.red.service.redaction.v1.server.service.AnalyzeService;
|
|||||||
import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
|
import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
|
||||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||||
import com.iqser.red.service.redaction.v1.server.testcontainers.MongoDBTestContainer;
|
import com.iqser.red.service.redaction.v1.server.testcontainers.MongoDBTestContainer;
|
||||||
import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException;
|
|
||||||
import com.iqser.red.storage.commons.service.StorageService;
|
import com.iqser.red.storage.commons.service.StorageService;
|
||||||
import com.knecon.fforesight.mongo.database.commons.liquibase.TenantMongoLiquibaseExecutor;
|
import com.knecon.fforesight.mongo.database.commons.liquibase.TenantMongoLiquibaseExecutor;
|
||||||
import com.knecon.fforesight.mongo.database.commons.service.MongoConnectionProvider;
|
import com.knecon.fforesight.mongo.database.commons.service.MongoConnectionProvider;
|
||||||
@ -80,7 +82,25 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
* This way you can recreate what is happening on the stack almost exactly.
|
* This way you can recreate what is happening on the stack almost exactly.
|
||||||
*/ public class AnalysisEnd2EndTest {
|
*/ public class AnalysisEnd2EndTest {
|
||||||
|
|
||||||
Path dossierTemplateToUse = Path.of("/home/kschuettler/iqser/fforesight/dossier-templates-v2/dev/LayoutParsingDatasetEvaluation"); // Add your dossier-template here
|
// These files will be uploaded if they are present in the folder
|
||||||
|
public static final Set<FileType> ENDINGS_TO_UPLOAD = Set.of(FileType.ORIGIN,
|
||||||
|
FileType.DOCUMENT_PAGES,
|
||||||
|
FileType.DOCUMENT_POSITION,
|
||||||
|
FileType.DOCUMENT_STRUCTURE,
|
||||||
|
FileType.DOCUMENT_TEXT,
|
||||||
|
FileType.IMAGE_INFO,
|
||||||
|
FileType.NER_ENTITIES,
|
||||||
|
FileType.TABLES,
|
||||||
|
FileType.IMPORTED_REDACTIONS);
|
||||||
|
|
||||||
|
// These files must be present in the folder or the test will skip the file
|
||||||
|
public static final Set<FileType> REQUIRED_FILES = Set.of(FileType.ORIGIN,
|
||||||
|
FileType.DOCUMENT_PAGES,
|
||||||
|
FileType.DOCUMENT_POSITION,
|
||||||
|
FileType.DOCUMENT_STRUCTURE,
|
||||||
|
FileType.DOCUMENT_TEXT);
|
||||||
|
|
||||||
|
Path dossierTemplateToUse = Path.of("/home/kschuettler/Downloads/mainBodyFailed/DOSSIER_TEMPLATE"); // Add your dossier-template here
|
||||||
ObjectMapper mapper = ObjectMapperFactory.create();
|
ObjectMapper mapper = ObjectMapperFactory.create();
|
||||||
final String TENANT_ID = "tenant";
|
final String TENANT_ID = "tenant";
|
||||||
|
|
||||||
@ -121,7 +141,7 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void runAnalysisEnd2End() {
|
public void runAnalysisEnd2End() {
|
||||||
|
|
||||||
String folder = "/home/kschuettler/Dokumente/analysisend2end/file1"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
|
String folder = "/home/kschuettler/Downloads/mainBodyFailed/728d0af4-f4c4-4bc9-acf8-7d2632b02962/"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
|
||||||
|
|
||||||
Path absoluteFolderPath;
|
Path absoluteFolderPath;
|
||||||
if (folder.startsWith("files")) { // if it starts with "files" it is most likely in the resources folder, else it should be an absolute path
|
if (folder.startsWith("files")) { // if it starts with "files" it is most likely in the resources folder, else it should be an absolute path
|
||||||
@ -133,11 +153,14 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
|
|
||||||
log.info("Starting end2end analyses for all distinct filenames in folder: {}", folder);
|
log.info("Starting end2end analyses for all distinct filenames in folder: {}", folder);
|
||||||
List<AnalyzeRequest> analyzeRequests = prepareStorageForFolder(absoluteFolderPath);
|
List<AnalyzeRequest> analyzeRequests = prepareStorageForFolder(absoluteFolderPath);
|
||||||
log.info("Found {} distinct fileIds", analyzeRequests.size());
|
log.info("Found {} distinct fileIds with all required files", analyzeRequests.size());
|
||||||
for (int i = 0; i < analyzeRequests.size(); i++) {
|
for (int i = 0; i < analyzeRequests.size(); i++) {
|
||||||
AnalyzeRequest analyzeRequest = analyzeRequests.get(i);
|
AnalyzeRequest analyzeRequest = analyzeRequests.get(i);
|
||||||
|
log.info("----------------------------------------------------------------------------------");
|
||||||
log.info("{}/{}: Starting analysis for file {}", i + 1, analyzeRequests.size(), analyzeRequest.getFileId());
|
log.info("{}/{}: Starting analysis for file {}", i + 1, analyzeRequests.size(), analyzeRequest.getFileId());
|
||||||
analyzeService.analyze(analyzeRequest);
|
analyzeService.analyze(analyzeRequest);
|
||||||
|
log.info("----------------------------------------------------------------------------------");
|
||||||
|
log.info("");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -188,22 +211,36 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
private List<AnalyzeRequest> prepareStorageForFolder(Path folder) {
|
private List<AnalyzeRequest> prepareStorageForFolder(Path folder) {
|
||||||
|
|
||||||
return Files.list(folder)
|
return findOriginFiles(folder).stream()
|
||||||
.map(this::parseFileId)
|
|
||||||
.distinct()
|
|
||||||
.map(fileId -> prepareStorageForFile(fileId, folder))
|
.map(fileId -> prepareStorageForFile(fileId, folder))
|
||||||
|
.filter(Optional::isPresent)
|
||||||
|
.map(Optional::get)
|
||||||
.toList();
|
.toList();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private String parseFileId(Path path) {
|
private Set<String> findOriginFiles(Path folder) throws IOException {
|
||||||
|
|
||||||
return path.getFileName().toString().split("\\.")[0];
|
return Files.walk(folder)
|
||||||
|
.map(this::parseFileName)
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private String parseFileName(Path path) {
|
||||||
|
|
||||||
|
String suffix = ".ORIGIN.pdf";
|
||||||
|
if (!path.getFileName().toString().endsWith(suffix)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return path.getFileName().toString().replace(suffix, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
private AnalyzeRequest prepareStorageForFile(String fileId, Path folder) {
|
private Optional<AnalyzeRequest> prepareStorageForFile(String fileName, Path folder) {
|
||||||
|
|
||||||
AnalyzeRequest request = new AnalyzeRequest();
|
AnalyzeRequest request = new AnalyzeRequest();
|
||||||
request.setDossierId(UUID.randomUUID().toString());
|
request.setDossierId(UUID.randomUUID().toString());
|
||||||
@ -211,53 +248,65 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
request.setDossierTemplateId(testDossierTemplate.id);
|
request.setDossierTemplateId(testDossierTemplate.id);
|
||||||
request.setAnalysisNumber(-1);
|
request.setAnalysisNumber(-1);
|
||||||
|
|
||||||
Path manualRedactionFile = folder.resolve(fileId + ".MANUAL_REDACTIONS.json");
|
Path manualRedactionFile = folder.resolve(fileName + ".MANUAL_REDACTIONS.json");
|
||||||
if (Files.exists(manualRedactionFile)) {
|
if (Files.exists(manualRedactionFile)) {
|
||||||
request.setManualRedactions(mapper.readValue(manualRedactionFile.toFile(), ManualRedactions.class));
|
request.setManualRedactions(parseManualRedactions(manualRedactionFile));
|
||||||
} else {
|
} else {
|
||||||
request.setManualRedactions(new ManualRedactions());
|
request.setManualRedactions(new ManualRedactions());
|
||||||
}
|
}
|
||||||
|
|
||||||
Set<FileType> endingsToUpload = Set.of("ORIGIN",
|
Set<FileType> uploadedFileTypes = findFilesToUpload(fileName, folder, ENDINGS_TO_UPLOAD).map(filePath -> uploadFile(filePath, request))
|
||||||
"DOCUMENT_PAGES",
|
.map(FileToUpload::fileType)
|
||||||
"DOCUMENT_POSITION",
|
|
||||||
"DOCUMENT_STRUCTURE",
|
|
||||||
"DOCUMENT_TEXT",
|
|
||||||
"IMAGE_INFO",
|
|
||||||
"NER_ENTITIES",
|
|
||||||
"TABLES",
|
|
||||||
"IMPORTED_REDACTIONS")
|
|
||||||
.stream()
|
|
||||||
.map(FileType::valueOf)
|
|
||||||
.collect(Collectors.toSet());
|
|
||||||
|
|
||||||
Set<FileType> uploadedFileTypes = Files.walk(folder)
|
|
||||||
.filter(path -> path.toFile().isFile())
|
|
||||||
.filter(path -> parseFileTypeFromPath(path).map(endingsToUpload::contains)
|
|
||||||
.orElse(false))
|
|
||||||
.map(filePath -> uploadFile(filePath, request))
|
|
||||||
.filter(Optional::isPresent)
|
|
||||||
.map(Optional::get)
|
|
||||||
.collect(Collectors.toUnmodifiableSet());
|
.collect(Collectors.toUnmodifiableSet());
|
||||||
|
|
||||||
Set<FileType> missingFileTypes = Sets.difference(endingsToUpload, uploadedFileTypes);
|
Set<FileType> missingFileTypes = Sets.difference(REQUIRED_FILES, uploadedFileTypes);
|
||||||
|
|
||||||
if (!missingFileTypes.isEmpty()) {
|
if (!missingFileTypes.isEmpty()) {
|
||||||
log.error("Folder {} is missing files of type {}",
|
log.error("Folder {} is missing files of type {}",
|
||||||
folder.toFile(),
|
folder.toFile(),
|
||||||
missingFileTypes.stream()
|
missingFileTypes.stream()
|
||||||
.map(Enum::toString)
|
.map(Enum::toString)
|
||||||
.collect(Collectors.joining(", ")));
|
.collect(Collectors.joining(", ")));
|
||||||
throw new NotFoundException("Not all required file types are present.");
|
return Optional.empty();
|
||||||
}
|
}
|
||||||
return request;
|
return Optional.of(request);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static Optional<FileType> parseFileTypeFromPath(Path path) {
|
private static Stream<FileToUpload> findFilesToUpload(String fileName, Path folder, Set<FileType> endingsToUpload) throws IOException {
|
||||||
|
|
||||||
|
return Files.walk(folder)
|
||||||
|
.filter(path -> path.toFile().isFile())
|
||||||
|
.map(path -> parseFileTypeFromPath(path, fileName, endingsToUpload))
|
||||||
|
.filter(Optional::isPresent)
|
||||||
|
.map(Optional::get);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private ManualRedactions parseManualRedactions(Path manualRedactionFile) {
|
||||||
|
|
||||||
String fileType = path.getFileName().toString().split("\\.")[1];
|
|
||||||
try {
|
try {
|
||||||
return Optional.of(FileType.valueOf(fileType));
|
return mapper.readValue(manualRedactionFile.toFile(), ManualRedactions.class);
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.error("Could not parse manual redactions");
|
||||||
|
return new ManualRedactions();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static Optional<FileToUpload> parseFileTypeFromPath(Path path, String fileName, Set<FileType> endingsToUpload) {
|
||||||
|
|
||||||
|
if (!path.getFileName().toString().startsWith(fileName)) {
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
String fileTypeString = path.getFileName().toString().split("\\.")[1];
|
||||||
|
FileType fileType = FileType.valueOf(fileTypeString);
|
||||||
|
if (!endingsToUpload.contains(fileType)) {
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
return Optional.of(new FileToUpload(path, fileType));
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
return Optional.empty();
|
return Optional.empty();
|
||||||
}
|
}
|
||||||
@ -265,21 +314,26 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
|
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
private Optional<FileType> uploadFile(Path path, AnalyzeRequest request) {
|
private FileToUpload uploadFile(FileToUpload fileToUpload, AnalyzeRequest request) {
|
||||||
|
|
||||||
Optional<FileType> fileType = parseFileTypeFromPath(path);
|
if (fileToUpload.path().getFileName().toString().endsWith(".gz")) {
|
||||||
if (fileType.isEmpty()) {
|
try (var fis = new FileInputStream(fileToUpload.path().toFile()); var in = new GZIPInputStream(fis);) {
|
||||||
return Optional.empty();
|
storageService.storeObject(TENANT_ID,
|
||||||
|
RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), fileToUpload.fileType()),
|
||||||
|
in);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
try (var in = new FileInputStream(fileToUpload.path().toFile())) {
|
||||||
|
storageService.storeObject(TENANT_ID,
|
||||||
|
RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), fileToUpload.fileType()),
|
||||||
|
in);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
try (var fis = new FileInputStream(path.toFile()); var in = new GZIPInputStream(fis);) {
|
return fileToUpload;
|
||||||
storageService.storeObject(TENANT_ID, RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), fileType.get()), in);
|
|
||||||
|
|
||||||
}
|
|
||||||
return fileType;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private class TestDossierTemplate {
|
public class TestDossierTemplate {
|
||||||
|
|
||||||
String id;
|
String id;
|
||||||
Dictionary testDictionary;
|
Dictionary testDictionary;
|
||||||
@ -379,4 +433,8 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private record FileToUpload(Path path, FileType fileType) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -174,6 +174,16 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
|||||||
expectedDates.add("03/08/1992");
|
expectedDates.add("03/08/1992");
|
||||||
expectedDates.add("13/08/1992");
|
expectedDates.add("13/08/1992");
|
||||||
expectedDates.add("27/02/1992");
|
expectedDates.add("27/02/1992");
|
||||||
|
expectedDates.add("27/10/1989");
|
||||||
|
expectedDates.add("07/10/1989");
|
||||||
|
expectedDates.add("21/08/1998");
|
||||||
|
expectedDates.add("02/08/1998");
|
||||||
|
expectedDates.add("01/05/1988");
|
||||||
|
expectedDates.add("02/06/2003");
|
||||||
|
expectedDates.add("03/09/2005");
|
||||||
|
expectedDates.add("06/09/2005");
|
||||||
|
expectedDates.add("17/08/2005");
|
||||||
|
expectedDates.add("22/08/2035");
|
||||||
|
|
||||||
String dates = experimentalDates.getComponentValues()
|
String dates = experimentalDates.getComponentValues()
|
||||||
.get(0).getValue();
|
.get(0).getValue();
|
||||||
|
|||||||
@ -107,7 +107,7 @@ public class MigrationIntegrationTest extends BuildDocumentIntegrationTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void testSave() {
|
public void testSave() {
|
||||||
|
|
||||||
MigratedIds ids = new MigratedIds(new LinkedList<>(), null);
|
MigratedIds ids = new MigratedIds(new LinkedList<>(), null, null);
|
||||||
ids.addMapping("123", "321");
|
ids.addMapping("123", "321");
|
||||||
ids.addMapping("123", "321");
|
ids.addMapping("123", "321");
|
||||||
ids.addMapping("123", "321");
|
ids.addMapping("123", "321");
|
||||||
|
|||||||
@ -10,6 +10,7 @@ import java.time.OffsetDateTime;
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.UUID;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
@ -25,24 +26,30 @@ import org.springframework.context.annotation.Configuration;
|
|||||||
import org.springframework.context.annotation.FilterType;
|
import org.springframework.context.annotation.FilterType;
|
||||||
import org.springframework.context.annotation.Import;
|
import org.springframework.context.annotation.Import;
|
||||||
import org.springframework.context.annotation.Primary;
|
import org.springframework.context.annotation.Primary;
|
||||||
|
import org.springframework.core.io.ClassPathResource;
|
||||||
import org.springframework.test.context.junit.jupiter.SpringExtension;
|
import org.springframework.test.context.junit.jupiter.SpringExtension;
|
||||||
|
|
||||||
import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
|
||||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
|
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
|
||||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
|
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
|
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
|
||||||
import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
|
import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||||
import com.iqser.red.storage.commons.service.StorageService;
|
import com.iqser.red.storage.commons.service.StorageService;
|
||||||
import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService;
|
import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService;
|
||||||
@ -50,6 +57,8 @@ import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsi
|
|||||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration;
|
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration;
|
||||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||||
|
|
||||||
|
import lombok.SneakyThrows;
|
||||||
|
|
||||||
@ExtendWith(SpringExtension.class)
|
@ExtendWith(SpringExtension.class)
|
||||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||||
@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class)
|
@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class)
|
||||||
@ -103,6 +112,17 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
|||||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||||
.build(),
|
.build(),
|
||||||
|
Type.builder()
|
||||||
|
.id(DOSSIER_AUTHOR_TYPE_ID)
|
||||||
|
.type(DICTIONARY_AUTHOR)
|
||||||
|
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
|
||||||
|
.dossierId(TEST_DOSSIER_ID)
|
||||||
|
.hexColor("#ffe184")
|
||||||
|
.isHint(hintTypeMap.get(DICTIONARY_AUTHOR))
|
||||||
|
.isCaseInsensitive(caseInSensitiveMap.get(DICTIONARY_AUTHOR))
|
||||||
|
.isRecommendation(recommendationTypeMap.get(DICTIONARY_AUTHOR))
|
||||||
|
.rank(rankTypeMap.get(DICTIONARY_AUTHOR))
|
||||||
|
.build(),
|
||||||
Type.builder()
|
Type.builder()
|
||||||
.id(DOSSIER_PUBLISHED_INFORMATION_TYPE_ID)
|
.id(DOSSIER_PUBLISHED_INFORMATION_TYPE_ID)
|
||||||
.type(PUBLISHED_INFORMATION_INDICATOR)
|
.type(PUBLISHED_INFORMATION_INDICATOR)
|
||||||
@ -158,10 +178,10 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
|||||||
|
|
||||||
var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst()
|
var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst()
|
||||||
.orElseThrow();
|
.orElseThrow();
|
||||||
assertThat(publishedInformationEntry1.getSection().startsWith("Paragraph:"));
|
assertThat(publishedInformationEntry1.getSection()).startsWith("Paragraph:");
|
||||||
var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst()
|
var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst()
|
||||||
.orElseThrow();
|
.orElseThrow();
|
||||||
assertThat(asyaLyon1.getSection().startsWith("Paragraph:"));
|
assertThat(asyaLyon1.getSection()).startsWith("Paragraph:");
|
||||||
assertEquals(EntryState.SKIPPED, asyaLyon1.getState());
|
assertEquals(EntryState.SKIPPED, asyaLyon1.getState());
|
||||||
|
|
||||||
var idRemoval = buildIdRemoval(publishedInformationEntry1.getId());
|
var idRemoval = buildIdRemoval(publishedInformationEntry1.getId());
|
||||||
@ -212,10 +232,10 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
|||||||
|
|
||||||
var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst()
|
var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst()
|
||||||
.orElseThrow();
|
.orElseThrow();
|
||||||
assertThat(publishedInformationEntry1.getSection().startsWith("Paragraph:"));
|
assertThat(publishedInformationEntry1.getSection()).startsWith("Paragraph:");
|
||||||
var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst()
|
var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst()
|
||||||
.orElseThrow();
|
.orElseThrow();
|
||||||
assertThat(publishedInformationEntry1.getSection().startsWith("Paragraph:"));
|
assertThat(publishedInformationEntry1.getSection()).startsWith("Paragraph:");
|
||||||
assertEquals(EntryState.SKIPPED, asyaLyon1.getState());
|
assertEquals(EntryState.SKIPPED, asyaLyon1.getState());
|
||||||
|
|
||||||
var idRemoval = buildIdRemoval(publishedInformationEntry1.getId());
|
var idRemoval = buildIdRemoval(publishedInformationEntry1.getId());
|
||||||
@ -244,6 +264,7 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
|||||||
assertThat(dictionary.get(PUBLISHED_INFORMATION_INDICATOR).contains("Press")).isFalse();
|
assertThat(dictionary.get(PUBLISHED_INFORMATION_INDICATOR).contains("Press")).isFalse();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testPublishedInformationRemovalAtDossierLevel() throws IOException {
|
public void testPublishedInformationRemovalAtDossierLevel() throws IOException {
|
||||||
|
|
||||||
@ -348,7 +369,7 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
|||||||
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||||
|
|
||||||
var desireeEtAl = findEntityByTypeAndValue(entityLog, "CBI_author", "Desiree").filter(e -> e.getEntryType().equals(EntryType.ENTITY))
|
var desireeEtAl = findEntityByTypeAndValue(entityLog, "CBI_author", "Desiree").filter(e -> e.getEntryType().equals(EntryType.ENTITY))
|
||||||
.filter(e -> e.getMatchedRule().startsWith("CBI.16"))
|
.filter(e -> e.getMatchedRule().startsWith("CBI.7"))
|
||||||
.findAny()
|
.findAny()
|
||||||
.orElseThrow();
|
.orElseThrow();
|
||||||
IdRemoval removal = buildIdRemoval(desireeEtAl.getId());
|
IdRemoval removal = buildIdRemoval(desireeEtAl.getId());
|
||||||
@ -365,6 +386,75 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@SneakyThrows
|
||||||
|
void testNerEntitiesAfterReanalysis() {
|
||||||
|
|
||||||
|
String EFSA_SANITISATION_RULES = loadFromClassPath("drools/efsa_sanitisation.drl");
|
||||||
|
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(EFSA_SANITISATION_RULES));
|
||||||
|
|
||||||
|
ClassPathResource responseJson = new ClassPathResource("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1 3.NER_ENTITIES.json");
|
||||||
|
storageService.storeObject(TenantContext.getTenantId(),
|
||||||
|
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES),
|
||||||
|
responseJson.getInputStream());
|
||||||
|
|
||||||
|
String pdfFile = "files/new/SYNGENTA_EFSA_sanitisation_GFL_v1 3.pdf";
|
||||||
|
|
||||||
|
AnalyzeRequest request = uploadFileToStorage(pdfFile);
|
||||||
|
|
||||||
|
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
|
||||||
|
request.setAnalysisNumber(1);
|
||||||
|
dossierDictionary.put(DICTIONARY_AUTHOR, new ArrayList<>());
|
||||||
|
mockDictionaryCalls(0L);
|
||||||
|
|
||||||
|
analyzeService.analyze(request);
|
||||||
|
|
||||||
|
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||||
|
|
||||||
|
String nerValue = "Osip S.";
|
||||||
|
var nerEntity = findEntityByTypeAndValue(entityLog, DICTIONARY_AUTHOR, nerValue).findFirst()
|
||||||
|
.orElseThrow();
|
||||||
|
assertThat(nerEntity.getEngines()).contains(Engine.NER);
|
||||||
|
|
||||||
|
String dictionaryAddValue = "cooperation";
|
||||||
|
ManualRedactionEntry manualRedactionEntry = ManualRedactionEntry.builder()
|
||||||
|
.value(dictionaryAddValue)
|
||||||
|
.type(DICTIONARY_AUTHOR)
|
||||||
|
.user("user")
|
||||||
|
.addToDossierDictionary(true)
|
||||||
|
.positions(List.of(Rectangle.builder().topLeftX(180.748f).topLeftY(546.564f).width(56.592f).height(15.408f).page(1).build()))
|
||||||
|
.type("dossier_redaction")
|
||||||
|
.fileId(TEST_FILE_ID)
|
||||||
|
.requestDate(OffsetDateTime.now())
|
||||||
|
.annotationId(UUID.randomUUID().toString())
|
||||||
|
.build();
|
||||||
|
request.setManualRedactions(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry)).build());
|
||||||
|
|
||||||
|
request.setAnalysisNumber(2);
|
||||||
|
dossierDictionary.get(DICTIONARY_AUTHOR).add(dictionaryAddValue);
|
||||||
|
reanlysisVersions.put(dictionaryAddValue, 2L);
|
||||||
|
when(dictionaryClient.getVersionForDossier(TEST_DOSSIER_ID)).thenReturn(2L);
|
||||||
|
mockDictionaryCalls(1L);
|
||||||
|
|
||||||
|
AnalyzeResult reanalyzeResult = analyzeService.reanalyze(request);
|
||||||
|
|
||||||
|
entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||||
|
|
||||||
|
EntityLogEntry entityLogEntryAdded = entityLog.getEntityLogEntry()
|
||||||
|
.stream()
|
||||||
|
.filter(entityLogEntry -> entityLogEntry.getValue().equals(dictionaryAddValue))
|
||||||
|
.findFirst()
|
||||||
|
.get();
|
||||||
|
assertEquals(EntryState.APPLIED, entityLogEntryAdded.getState());
|
||||||
|
|
||||||
|
nerEntity = findEntityByTypeAndValue(entityLog, DICTIONARY_AUTHOR, nerValue).findFirst()
|
||||||
|
.orElseThrow();
|
||||||
|
assertThat(nerEntity.getEngines()).contains(Engine.NER);
|
||||||
|
dossierDictionary.get(DICTIONARY_AUTHOR).remove(dictionaryAddValue);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private static IdRemoval buildIdRemoval(String id) {
|
private static IdRemoval buildIdRemoval(String id) {
|
||||||
|
|
||||||
return IdRemoval.builder().annotationId(id).user("user").requestDate(OffsetDateTime.now()).fileId(TEST_FILE_ID).build();
|
return IdRemoval.builder().annotationId(id).user("user").requestDate(OffsetDateTime.now()).fileId(TEST_FILE_ID).build();
|
||||||
|
|||||||
@ -43,7 +43,17 @@ public class DateConverterTest {
|
|||||||
"28 March 2018 (animal 1 - 5000 mg/kg bw)",
|
"28 March 2018 (animal 1 - 5000 mg/kg bw)",
|
||||||
"28 March 2018 (animal1 - 5000 mg/kg bw)",
|
"28 March 2018 (animal1 - 5000 mg/kg bw)",
|
||||||
"28 August 2018 (animal 1)",
|
"28 August 2018 (animal 1)",
|
||||||
"31 August 2018 (animal 1)");
|
"31 August 2018 (animal 1)",
|
||||||
|
"October, 27 1989",
|
||||||
|
"October, 7 1989",
|
||||||
|
"August 21,1998",
|
||||||
|
"August 2,1998",
|
||||||
|
"1st May 1988",
|
||||||
|
"2nd June 2003",
|
||||||
|
"3rd September 2005",
|
||||||
|
"6th September 2005",
|
||||||
|
"17th August 2005",
|
||||||
|
"22nd August 2035");
|
||||||
|
|
||||||
for (String dateStr : goldenStandardDates) {
|
for (String dateStr : goldenStandardDates) {
|
||||||
Optional<Date> parsedDate = DateConverter.parseDate(dateStr);
|
Optional<Date> parsedDate = DateConverter.parseDate(dateStr);
|
||||||
|
|||||||
@ -361,7 +361,6 @@ class DroolsValidationServiceTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
void testRulesWithBlacklistedKeyword() {
|
void testRulesWithBlacklistedKeyword() {
|
||||||
@ -388,7 +387,7 @@ class DroolsValidationServiceTest {
|
|||||||
when
|
when
|
||||||
$fileAttribute: FileAttribute($label: label, $value: value)
|
$fileAttribute: FileAttribute($label: label, $value: value)
|
||||||
$duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value)
|
$duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value)
|
||||||
""";
|
""";
|
||||||
String evilRulePart2 = """
|
String evilRulePart2 = """
|
||||||
then
|
then
|
||||||
retract($duplicate);
|
retract($duplicate);
|
||||||
@ -408,11 +407,13 @@ class DroolsValidationServiceTest {
|
|||||||
.forEach(System.out::println);
|
.forEach(System.out::println);
|
||||||
assertFalse(droolsValidation.isCompiled());
|
assertFalse(droolsValidation.isCompiled());
|
||||||
assertEquals(2, droolsValidation.getBlacklistErrorMessages().size());
|
assertEquals(2, droolsValidation.getBlacklistErrorMessages().size());
|
||||||
assertEquals(1, droolsValidation.getBlacklistErrorMessages()
|
assertEquals(1,
|
||||||
|
droolsValidation.getBlacklistErrorMessages()
|
||||||
.get(0).getBlacklistedKeywords().size());
|
.get(0).getBlacklistedKeywords().size());
|
||||||
assertTrue(droolsValidation.getBlacklistErrorMessages()
|
assertTrue(droolsValidation.getBlacklistErrorMessages()
|
||||||
.get(0).getBlacklistedKeywords().contains("TenantContext"));
|
.get(0).getBlacklistedKeywords().contains("TenantContext"));
|
||||||
assertEquals(2, droolsValidation.getBlacklistErrorMessages()
|
assertEquals(2,
|
||||||
|
droolsValidation.getBlacklistErrorMessages()
|
||||||
.get(1).getBlacklistedKeywords().size());
|
.get(1).getBlacklistedKeywords().size());
|
||||||
assertTrue(droolsValidation.getBlacklistErrorMessages()
|
assertTrue(droolsValidation.getBlacklistErrorMessages()
|
||||||
.get(1).getBlacklistedKeywords().contains("TenantContext"));
|
.get(1).getBlacklistedKeywords().contains("TenantContext"));
|
||||||
@ -429,14 +430,39 @@ class DroolsValidationServiceTest {
|
|||||||
.forEach(System.out::println);
|
.forEach(System.out::println);
|
||||||
assertFalse(droolsValidation2.isCompiled());
|
assertFalse(droolsValidation2.isCompiled());
|
||||||
assertEquals(2, droolsValidation2.getBlacklistErrorMessages().size());
|
assertEquals(2, droolsValidation2.getBlacklistErrorMessages().size());
|
||||||
assertEquals(1, droolsValidation2.getBlacklistErrorMessages()
|
assertEquals(1,
|
||||||
|
droolsValidation2.getBlacklistErrorMessages()
|
||||||
.get(0).getBlacklistedKeywords().size());
|
.get(0).getBlacklistedKeywords().size());
|
||||||
assertTrue(droolsValidation2.getBlacklistErrorMessages()
|
assertTrue(droolsValidation2.getBlacklistErrorMessages()
|
||||||
.get(0).getBlacklistedKeywords().contains("TenantContext"));
|
.get(0).getBlacklistedKeywords().contains("TenantContext"));
|
||||||
assertEquals(1, droolsValidation2.getBlacklistErrorMessages()
|
assertEquals(1,
|
||||||
|
droolsValidation2.getBlacklistErrorMessages()
|
||||||
.get(1).getBlacklistedKeywords().size());
|
.get(1).getBlacklistedKeywords().size());
|
||||||
assertTrue(droolsValidation2.getBlacklistErrorMessages()
|
assertTrue(droolsValidation2.getBlacklistErrorMessages()
|
||||||
.get(1).getBlacklistedKeywords().contains("System."));
|
.get(1).getBlacklistedKeywords().contains("System."));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void assertRuleIdentifierDoesNotMatch() {
|
||||||
|
|
||||||
|
String ruleString = RuleManagementResources.getBaseRuleFileString() + """
|
||||||
|
rule "CBI.0.0: Redact CBI Authors (non vertebrate Study)"
|
||||||
|
when
|
||||||
|
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||||
|
$entity: TextEntity(type() == "CBI_author", dictionaryEntry)
|
||||||
|
then
|
||||||
|
$entity.redact(
|
||||||
|
"CBI.1.0",
|
||||||
|
"Author found",
|
||||||
|
"Article 39(e)(3) of Regulation (EC) No 178/2002"
|
||||||
|
);
|
||||||
|
end
|
||||||
|
""";
|
||||||
|
|
||||||
|
RuleFileBluePrint ruleFileBluePrint = RuleFileParser.buildBluePrintFromRulesString(ruleString);
|
||||||
|
|
||||||
|
assertFalse(ruleFileBluePrint.getDroolsValidation().isCompiled());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -129,56 +129,54 @@ rule "CBI.2.0: Do not redact genitive CBI Author"
|
|||||||
|
|
||||||
|
|
||||||
// Rule unit: CBI.7
|
// Rule unit: CBI.7
|
||||||
rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables"
|
rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx"
|
||||||
|
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||||
when
|
when
|
||||||
$section: Section(!hasTables(),
|
$section: Section(containsString("et al."))
|
||||||
hasEntitiesOfType("published_information"),
|
|
||||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
|
||||||
then
|
then
|
||||||
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||||
.forEach(redactionEntity -> {
|
.forEach(entity -> {
|
||||||
redactionEntity.skipWithReferences(
|
entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||||
"CBI.7.0",
|
dictionary.recommendEverywhere(entity);
|
||||||
"Published Information found in section",
|
|
||||||
$section.getEntitiesOfType("published_information")
|
|
||||||
);
|
|
||||||
});
|
});
|
||||||
end
|
end
|
||||||
|
|
||||||
rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row"
|
rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
|
||||||
|
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||||
when
|
when
|
||||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
|
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
$section: Section(containsString("et al."))
|
||||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
|
||||||
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
|
|
||||||
then
|
then
|
||||||
$authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
|
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||||
end
|
.forEach(entity -> {
|
||||||
|
entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||||
rule "CBI.7.2: Do not redact PII if published information found in Section without tables"
|
dictionary.recommendEverywhere(entity);
|
||||||
when
|
|
||||||
$section: Section(!hasTables(),
|
|
||||||
hasEntitiesOfType("published_information"),
|
|
||||||
hasEntitiesOfType("PII"))
|
|
||||||
then
|
|
||||||
$section.getEntitiesOfType("PII")
|
|
||||||
.forEach(redactionEntity -> {
|
|
||||||
redactionEntity.skipWithReferences(
|
|
||||||
"CBI.7.2",
|
|
||||||
"Published Information found in section",
|
|
||||||
$section.getEntitiesOfType("published_information")
|
|
||||||
);
|
|
||||||
});
|
});
|
||||||
end
|
end
|
||||||
|
|
||||||
rule "CBI.7.3: Do not redact PII if published information found in same table row"
|
rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
|
||||||
|
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||||
when
|
when
|
||||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
|
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
$section: Section(containsString("et al."))
|
||||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
|
||||||
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
|
|
||||||
then
|
then
|
||||||
$pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
|
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||||
|
.forEach(entity -> {
|
||||||
|
entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||||
|
dictionary.recommendEverywhere(entity);
|
||||||
|
});
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx"
|
||||||
|
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||||
|
when
|
||||||
|
$section: Section(containsString("et al."))
|
||||||
|
then
|
||||||
|
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||||
|
.forEach(entity -> {
|
||||||
|
entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
|
||||||
|
dictionary.recommendEverywhere(entity);
|
||||||
|
});
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
@ -289,54 +287,56 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study
|
|||||||
|
|
||||||
|
|
||||||
// Rule unit: CBI.16
|
// Rule unit: CBI.16
|
||||||
rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx"
|
rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables"
|
||||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
|
||||||
when
|
when
|
||||||
$section: Section(containsString("et al."))
|
$section: Section(!hasTables(),
|
||||||
|
hasEntitiesOfType("published_information"),
|
||||||
|
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||||
then
|
then
|
||||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||||
.forEach(entity -> {
|
.forEach(redactionEntity -> {
|
||||||
entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
redactionEntity.skipWithReferences(
|
||||||
dictionary.recommendEverywhere(entity);
|
"CBI.16.0",
|
||||||
|
"Published Information found in section",
|
||||||
|
$section.getEntitiesOfType("published_information")
|
||||||
|
);
|
||||||
});
|
});
|
||||||
end
|
end
|
||||||
|
|
||||||
rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
|
rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row"
|
||||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
|
||||||
when
|
when
|
||||||
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
|
||||||
$section: Section(containsString("et al."))
|
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||||
|
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||||
|
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
|
||||||
then
|
then
|
||||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
$authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
|
||||||
.forEach(entity -> {
|
end
|
||||||
entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
|
||||||
dictionary.recommendEverywhere(entity);
|
rule "CBI.16.2: Do not redact PII if published information found in Section without tables"
|
||||||
|
when
|
||||||
|
$section: Section(!hasTables(),
|
||||||
|
hasEntitiesOfType("published_information"),
|
||||||
|
hasEntitiesOfType("PII"))
|
||||||
|
then
|
||||||
|
$section.getEntitiesOfType("PII")
|
||||||
|
.forEach(redactionEntity -> {
|
||||||
|
redactionEntity.skipWithReferences(
|
||||||
|
"CBI.16.2",
|
||||||
|
"Published Information found in section",
|
||||||
|
$section.getEntitiesOfType("published_information")
|
||||||
|
);
|
||||||
});
|
});
|
||||||
end
|
end
|
||||||
|
|
||||||
rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
|
rule "CBI.16.3: Do not redact PII if published information found in same table row"
|
||||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
|
||||||
when
|
when
|
||||||
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
|
||||||
$section: Section(containsString("et al."))
|
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||||
|
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||||
|
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
|
||||||
then
|
then
|
||||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
$pii.skipWithReferences("CBI.16.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
|
||||||
.forEach(entity -> {
|
|
||||||
entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
|
||||||
dictionary.recommendEverywhere(entity);
|
|
||||||
});
|
|
||||||
end
|
|
||||||
|
|
||||||
rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx"
|
|
||||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
|
||||||
when
|
|
||||||
$section: Section(containsString("et al."))
|
|
||||||
then
|
|
||||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
|
||||||
.forEach(entity -> {
|
|
||||||
entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
|
|
||||||
dictionary.recommendEverywhere(entity);
|
|
||||||
});
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -312,56 +312,54 @@ rule "CBI.6.1: Do not redact Names and Addresses if vertebrate but also publishe
|
|||||||
|
|
||||||
|
|
||||||
// Rule unit: CBI.7
|
// Rule unit: CBI.7
|
||||||
rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables"
|
rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx"
|
||||||
|
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||||
when
|
when
|
||||||
$section: Section(!hasTables(),
|
$section: Section(containsString("et al."))
|
||||||
hasEntitiesOfType("published_information"),
|
|
||||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
|
||||||
then
|
then
|
||||||
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||||
.forEach(redactionEntity -> {
|
.forEach(entity -> {
|
||||||
redactionEntity.skipWithReferences(
|
entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||||
"CBI.7.0",
|
dictionary.recommendEverywhere(entity);
|
||||||
"Published Information found in section",
|
|
||||||
$section.getEntitiesOfType("published_information")
|
|
||||||
);
|
|
||||||
});
|
});
|
||||||
end
|
end
|
||||||
|
|
||||||
rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row"
|
rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
|
||||||
|
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||||
when
|
when
|
||||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
|
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
$section: Section(containsString("et al."))
|
||||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
|
||||||
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
|
|
||||||
then
|
then
|
||||||
$authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
|
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||||
end
|
.forEach(entity -> {
|
||||||
|
entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||||
rule "CBI.7.2: Do not redact PII if published information found in Section without tables"
|
dictionary.recommendEverywhere(entity);
|
||||||
when
|
|
||||||
$section: Section(!hasTables(),
|
|
||||||
hasEntitiesOfType("published_information"),
|
|
||||||
hasEntitiesOfType("PII"))
|
|
||||||
then
|
|
||||||
$section.getEntitiesOfType("PII")
|
|
||||||
.forEach(redactionEntity -> {
|
|
||||||
redactionEntity.skipWithReferences(
|
|
||||||
"CBI.7.2",
|
|
||||||
"Published Information found in section",
|
|
||||||
$section.getEntitiesOfType("published_information")
|
|
||||||
);
|
|
||||||
});
|
});
|
||||||
end
|
end
|
||||||
|
|
||||||
rule "CBI.7.3: Do not redact PII if published information found in same table row"
|
rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
|
||||||
|
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||||
when
|
when
|
||||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
|
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
$section: Section(containsString("et al."))
|
||||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
|
||||||
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
|
|
||||||
then
|
then
|
||||||
$pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
|
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||||
|
.forEach(entity -> {
|
||||||
|
entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||||
|
dictionary.recommendEverywhere(entity);
|
||||||
|
});
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx"
|
||||||
|
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||||
|
when
|
||||||
|
$section: Section(containsString("et al."))
|
||||||
|
then
|
||||||
|
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||||
|
.forEach(entity -> {
|
||||||
|
entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
|
||||||
|
dictionary.recommendEverywhere(entity);
|
||||||
|
});
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
@ -654,54 +652,56 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio
|
|||||||
|
|
||||||
|
|
||||||
// Rule unit: CBI.16
|
// Rule unit: CBI.16
|
||||||
rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx"
|
rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables"
|
||||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
|
||||||
when
|
when
|
||||||
$section: Section(containsString("et al."))
|
$section: Section(!hasTables(),
|
||||||
|
hasEntitiesOfType("published_information"),
|
||||||
|
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||||
then
|
then
|
||||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||||
.forEach(entity -> {
|
.forEach(redactionEntity -> {
|
||||||
entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
redactionEntity.skipWithReferences(
|
||||||
dictionary.recommendEverywhere(entity);
|
"CBI.16.0",
|
||||||
|
"Published Information found in section",
|
||||||
|
$section.getEntitiesOfType("published_information")
|
||||||
|
);
|
||||||
});
|
});
|
||||||
end
|
end
|
||||||
|
|
||||||
rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
|
rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row"
|
||||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
|
||||||
when
|
when
|
||||||
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
|
||||||
$section: Section(containsString("et al."))
|
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||||
|
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||||
|
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
|
||||||
then
|
then
|
||||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
$authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
|
||||||
.forEach(entity -> {
|
end
|
||||||
entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
|
||||||
dictionary.recommendEverywhere(entity);
|
rule "CBI.16.2: Do not redact PII if published information found in Section without tables"
|
||||||
|
when
|
||||||
|
$section: Section(!hasTables(),
|
||||||
|
hasEntitiesOfType("published_information"),
|
||||||
|
hasEntitiesOfType("PII"))
|
||||||
|
then
|
||||||
|
$section.getEntitiesOfType("PII")
|
||||||
|
.forEach(redactionEntity -> {
|
||||||
|
redactionEntity.skipWithReferences(
|
||||||
|
"CBI.16.2",
|
||||||
|
"Published Information found in section",
|
||||||
|
$section.getEntitiesOfType("published_information")
|
||||||
|
);
|
||||||
});
|
});
|
||||||
end
|
end
|
||||||
|
|
||||||
rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
|
rule "CBI.16.3: Do not redact PII if published information found in same table row"
|
||||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
|
||||||
when
|
when
|
||||||
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
|
||||||
$section: Section(containsString("et al."))
|
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||||
|
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||||
|
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
|
||||||
then
|
then
|
||||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
$pii.skipWithReferences("CBI.16.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
|
||||||
.forEach(entity -> {
|
|
||||||
entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
|
||||||
dictionary.recommendEverywhere(entity);
|
|
||||||
});
|
|
||||||
end
|
|
||||||
|
|
||||||
rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx"
|
|
||||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
|
||||||
when
|
|
||||||
$section: Section(containsString("et al."))
|
|
||||||
then
|
|
||||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
|
||||||
.forEach(entity -> {
|
|
||||||
entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
|
|
||||||
dictionary.recommendEverywhere(entity);
|
|
||||||
});
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -129,30 +129,30 @@ rule "CBI.2.0: Do not redact genitive CBI Author"
|
|||||||
|
|
||||||
|
|
||||||
// Rule unit: CBI.7
|
// Rule unit: CBI.7
|
||||||
rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables"
|
rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
|
||||||
|
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||||
when
|
when
|
||||||
$section: Section(!hasTables(),
|
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||||
hasEntitiesOfType("published_information"),
|
$section: Section(containsString("et al."))
|
||||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
|
||||||
then
|
then
|
||||||
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||||
.forEach(redactionEntity -> {
|
.forEach(entity -> {
|
||||||
redactionEntity.skipWithReferences(
|
entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||||
"CBI.7.0",
|
dictionary.recommendEverywhere(entity);
|
||||||
"Published Information found in section",
|
|
||||||
$section.getEntitiesOfType("published_information")
|
|
||||||
);
|
|
||||||
});
|
});
|
||||||
end
|
end
|
||||||
|
|
||||||
rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row"
|
rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
|
||||||
|
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||||
when
|
when
|
||||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
|
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
$section: Section(containsString("et al."))
|
||||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
|
||||||
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
|
|
||||||
then
|
then
|
||||||
$authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
|
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||||
|
.forEach(entity -> {
|
||||||
|
entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||||
|
dictionary.recommendEverywhere(entity);
|
||||||
|
});
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
@ -224,30 +224,30 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study
|
|||||||
|
|
||||||
|
|
||||||
// Rule unit: CBI.16
|
// Rule unit: CBI.16
|
||||||
rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
|
rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables"
|
||||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
|
||||||
when
|
when
|
||||||
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
$section: Section(!hasTables(),
|
||||||
$section: Section(containsString("et al."))
|
hasEntitiesOfType("published_information"),
|
||||||
|
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||||
then
|
then
|
||||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||||
.forEach(entity -> {
|
.forEach(redactionEntity -> {
|
||||||
entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
redactionEntity.skipWithReferences(
|
||||||
dictionary.recommendEverywhere(entity);
|
"CBI.16.0",
|
||||||
|
"Published Information found in section",
|
||||||
|
$section.getEntitiesOfType("published_information")
|
||||||
|
);
|
||||||
});
|
});
|
||||||
end
|
end
|
||||||
|
|
||||||
rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
|
rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row"
|
||||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
|
||||||
when
|
when
|
||||||
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
|
||||||
$section: Section(containsString("et al."))
|
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||||
|
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||||
|
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
|
||||||
then
|
then
|
||||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
$authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
|
||||||
.forEach(entity -> {
|
|
||||||
entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
|
||||||
dictionary.recommendEverywhere(entity);
|
|
||||||
});
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -214,6 +214,58 @@ rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also red
|
|||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
// Rule unit: CBI.7
|
||||||
|
rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx"
|
||||||
|
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||||
|
when
|
||||||
|
$section: Section(containsString("et al."))
|
||||||
|
then
|
||||||
|
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||||
|
.forEach(entity -> {
|
||||||
|
entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||||
|
dictionary.recommendEverywhere(entity);
|
||||||
|
});
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
|
||||||
|
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||||
|
when
|
||||||
|
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||||
|
$section: Section(containsString("et al."))
|
||||||
|
then
|
||||||
|
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||||
|
.forEach(entity -> {
|
||||||
|
entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||||
|
dictionary.recommendEverywhere(entity);
|
||||||
|
});
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
|
||||||
|
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||||
|
when
|
||||||
|
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||||
|
$section: Section(containsString("et al."))
|
||||||
|
then
|
||||||
|
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||||
|
.forEach(entity -> {
|
||||||
|
entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||||
|
dictionary.recommendEverywhere(entity);
|
||||||
|
});
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx"
|
||||||
|
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||||
|
when
|
||||||
|
$section: Section(containsString("et al."))
|
||||||
|
then
|
||||||
|
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||||
|
.forEach(entity -> {
|
||||||
|
entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
|
||||||
|
dictionary.recommendEverywhere(entity);
|
||||||
|
});
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
// Rule unit: CBI.8
|
// Rule unit: CBI.8
|
||||||
rule "CBI.8.0: Redacted because Section contains must_redact entity"
|
rule "CBI.8.0: Redacted because Section contains must_redact entity"
|
||||||
when
|
when
|
||||||
@ -424,58 +476,6 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio
|
|||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
// Rule unit: CBI.16
|
|
||||||
rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx"
|
|
||||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
|
||||||
when
|
|
||||||
$section: Section(containsString("et al."))
|
|
||||||
then
|
|
||||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
|
||||||
.forEach(entity -> {
|
|
||||||
entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
|
||||||
dictionary.recommendEverywhere(entity);
|
|
||||||
});
|
|
||||||
end
|
|
||||||
|
|
||||||
rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
|
|
||||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
|
||||||
when
|
|
||||||
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
|
||||||
$section: Section(containsString("et al."))
|
|
||||||
then
|
|
||||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
|
||||||
.forEach(entity -> {
|
|
||||||
entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
|
||||||
dictionary.recommendEverywhere(entity);
|
|
||||||
});
|
|
||||||
end
|
|
||||||
|
|
||||||
rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
|
|
||||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
|
||||||
when
|
|
||||||
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
|
||||||
$section: Section(containsString("et al."))
|
|
||||||
then
|
|
||||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
|
||||||
.forEach(entity -> {
|
|
||||||
entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
|
||||||
dictionary.recommendEverywhere(entity);
|
|
||||||
});
|
|
||||||
end
|
|
||||||
|
|
||||||
rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx"
|
|
||||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
|
||||||
when
|
|
||||||
$section: Section(containsString("et al."))
|
|
||||||
then
|
|
||||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
|
||||||
.forEach(entity -> {
|
|
||||||
entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
|
|
||||||
dictionary.recommendEverywhere(entity);
|
|
||||||
});
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
// Rule unit: CBI.17
|
// Rule unit: CBI.17
|
||||||
rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon"
|
rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon"
|
||||||
when
|
when
|
||||||
|
|||||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -4,11 +4,13 @@ import java.io.File;
|
|||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import com.knecon.fforesight.utility.rules.management.factory.RuleFileFactory;
|
import com.knecon.fforesight.utility.rules.management.factory.RuleFileFactory;
|
||||||
import com.knecon.fforesight.utility.rules.management.factory.RuleFileParser;
|
import com.knecon.fforesight.utility.rules.management.factory.RuleFileParser;
|
||||||
import com.knecon.fforesight.utility.rules.management.models.BasicRule;
|
import com.knecon.fforesight.utility.rules.management.models.BasicRule;
|
||||||
import com.knecon.fforesight.utility.rules.management.models.RuleFileBluePrint;
|
import com.knecon.fforesight.utility.rules.management.models.RuleFileBluePrint;
|
||||||
|
import com.knecon.fforesight.utility.rules.management.models.RuleIdentifier;
|
||||||
import com.knecon.fforesight.utility.rules.management.utils.RuleFileIO;
|
import com.knecon.fforesight.utility.rules.management.utils.RuleFileIO;
|
||||||
|
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
@ -21,17 +23,15 @@ import lombok.experimental.UtilityClass;
|
|||||||
@UtilityClass
|
@UtilityClass
|
||||||
public class RuleFileMigrator {
|
public class RuleFileMigrator {
|
||||||
|
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void migrateFile(File ruleFile) {
|
public void migrateFile(File ruleFile) {
|
||||||
|
|
||||||
RuleFileBluePrint ruleFileBluePrint = RuleFileParser.buildBluePrintFromRulesString(RuleFileIO.getRulesString(ruleFile.getAbsolutePath()));
|
RuleFileBluePrint ruleFileBluePrint = RuleFileParser.buildBluePrintFromRulesString(RuleFileIO.getRulesString(ruleFile.getAbsolutePath()));
|
||||||
RuleFileBluePrint combinedBluePrint = RuleFileParser.buildBluePrintFromAllRuleFiles();
|
RuleFileBluePrint combinedBluePrint = RuleFileParser.buildBluePrintFromAllRuleFiles();
|
||||||
|
|
||||||
for (BasicRule ruleToReplace : ruleFileBluePrint.getAllRules()) {
|
//replaceRules(ruleFileBluePrint, combinedBluePrint);
|
||||||
List<BasicRule> rulesToAdd = combinedBluePrint.findRuleByIdentifier(ruleToReplace.identifier());
|
replaceRuleIdentifiers(combinedBluePrint, ruleFileBluePrint);
|
||||||
ruleFileBluePrint.removeRule(ruleToReplace.identifier());
|
|
||||||
rulesToAdd.forEach(ruleFileBluePrint::addRule);
|
|
||||||
}
|
|
||||||
|
|
||||||
String migratedRulesString = RuleFileFactory.buildRuleString(ruleFileBluePrint);
|
String migratedRulesString = RuleFileFactory.buildRuleString(ruleFileBluePrint);
|
||||||
String migratedFilePath = ruleFile.getAbsolutePath();
|
String migratedFilePath = ruleFile.getAbsolutePath();
|
||||||
@ -40,4 +40,35 @@ public class RuleFileMigrator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static void replaceRules(RuleFileBluePrint ruleFileBluePrint, RuleFileBluePrint combinedBluePrint) {
|
||||||
|
|
||||||
|
for (BasicRule ruleToReplace : ruleFileBluePrint.getAllRules()) {
|
||||||
|
List<BasicRule> rulesToAdd = combinedBluePrint.findRuleByIdentifier(ruleToReplace.identifier());
|
||||||
|
ruleFileBluePrint.removeRule(ruleToReplace.identifier());
|
||||||
|
rulesToAdd.forEach(ruleFileBluePrint::addRule);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static void replaceRuleIdentifiers(RuleFileBluePrint combinedBluePrint, RuleFileBluePrint ruleFileBluePrint) {
|
||||||
|
|
||||||
|
Map<String, String> identifierReplaceMap = Map.of("CBI.7.0", "CBI.16.0", "CBI.7.1", "CBI.16.1", "CBI.7.2", "CBI.16.2", "CBI.7.3", "CBI.16.3");
|
||||||
|
for (String identifier : identifierReplaceMap.keySet()) {
|
||||||
|
RuleIdentifier ruleId = RuleIdentifier.fromString(identifier);
|
||||||
|
RuleIdentifier otherRuleId = RuleIdentifier.fromString(identifierReplaceMap.get(identifier));
|
||||||
|
|
||||||
|
List<BasicRule> rulesToAdd = combinedBluePrint.findRuleByIdentifier(otherRuleId);
|
||||||
|
List<BasicRule> otherRulesToAdd = combinedBluePrint.findRuleByIdentifier(ruleId);
|
||||||
|
boolean removeRules = ruleFileBluePrint.removeRule(ruleId);
|
||||||
|
boolean removeOtherRules = ruleFileBluePrint.removeRule(otherRuleId);
|
||||||
|
if (removeRules) {
|
||||||
|
rulesToAdd.forEach(ruleFileBluePrint::addRule);
|
||||||
|
}
|
||||||
|
if (removeOtherRules) {
|
||||||
|
otherRulesToAdd.forEach(ruleFileBluePrint::addRule);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -7,15 +7,19 @@ import java.util.List;
|
|||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
public record RuleFileBluePrint(String imports, String globals, String queries, List<RuleClass> ruleClasses) {
|
public record RuleFileBluePrint(String imports, String globals, String queries, List<RuleClass> ruleClasses) {
|
||||||
|
|
||||||
public void removeRule(RuleIdentifier ruleIdentifier) {
|
public boolean removeRule(RuleIdentifier ruleIdentifier) {
|
||||||
|
|
||||||
|
AtomicBoolean wasRemoved = new AtomicBoolean(false);
|
||||||
|
|
||||||
findRuleClassByType(ruleIdentifier.type()).ifPresent(ruleClass -> ruleClass.findRuleUnitByInteger(ruleIdentifier.unit())
|
findRuleClassByType(ruleIdentifier.type()).ifPresent(ruleClass -> ruleClass.findRuleUnitByInteger(ruleIdentifier.unit())
|
||||||
.ifPresent(ruleUnit -> {
|
.ifPresent(ruleUnit -> {
|
||||||
ruleUnit.rules().removeIf(rule -> rule.identifier().matches(ruleIdentifier));
|
boolean removed = ruleUnit.rules().removeIf(rule -> rule.identifier().matches(ruleIdentifier));
|
||||||
|
wasRemoved.set(removed);
|
||||||
if (ruleUnit.rules().isEmpty()) {
|
if (ruleUnit.rules().isEmpty()) {
|
||||||
ruleClass.ruleUnits().remove(ruleUnit);
|
ruleClass.ruleUnits().remove(ruleUnit);
|
||||||
}
|
}
|
||||||
@ -23,7 +27,7 @@ public record RuleFileBluePrint(String imports, String globals, String queries,
|
|||||||
ruleClasses().remove(ruleClass);
|
ruleClasses().remove(ruleClass);
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
|
return wasRemoved.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -312,58 +312,55 @@ rule "CBI.6.1: Do not redact Names and Addresses if vertebrate but also publishe
|
|||||||
|
|
||||||
|
|
||||||
// Rule unit: CBI.7
|
// Rule unit: CBI.7
|
||||||
rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables"
|
rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx"
|
||||||
|
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||||
when
|
when
|
||||||
$section: Section(!hasTables(),
|
$section: Section(containsString("et al."))
|
||||||
hasEntitiesOfType("published_information"),
|
|
||||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
|
||||||
then
|
then
|
||||||
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||||
.forEach(redactionEntity -> {
|
.forEach(entity -> {
|
||||||
redactionEntity.skipWithReferences(
|
entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||||
"CBI.7.0",
|
dictionary.recommendEverywhere(entity);
|
||||||
"Published Information found in section",
|
|
||||||
$section.getEntitiesOfType("published_information")
|
|
||||||
);
|
|
||||||
});
|
});
|
||||||
end
|
end
|
||||||
|
|
||||||
rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row"
|
rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
|
||||||
|
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||||
when
|
when
|
||||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
|
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
$section: Section(containsString("et al."))
|
||||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
|
||||||
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
|
|
||||||
then
|
then
|
||||||
$authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
|
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||||
end
|
.forEach(entity -> {
|
||||||
|
entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||||
rule "CBI.7.2: Do not redact PII if published information found in Section without tables"
|
dictionary.recommendEverywhere(entity);
|
||||||
when
|
|
||||||
$section: Section(!hasTables(),
|
|
||||||
hasEntitiesOfType("published_information"),
|
|
||||||
hasEntitiesOfType("PII"))
|
|
||||||
then
|
|
||||||
$section.getEntitiesOfType("PII")
|
|
||||||
.forEach(redactionEntity -> {
|
|
||||||
redactionEntity.skipWithReferences(
|
|
||||||
"CBI.7.2",
|
|
||||||
"Published Information found in section",
|
|
||||||
$section.getEntitiesOfType("published_information")
|
|
||||||
);
|
|
||||||
});
|
});
|
||||||
end
|
end
|
||||||
|
|
||||||
rule "CBI.7.3: Do not redact PII if published information found in same table row"
|
rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
|
||||||
|
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||||
when
|
when
|
||||||
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
|
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
||||||
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
$section: Section(containsString("et al."))
|
||||||
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
|
||||||
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
|
|
||||||
then
|
then
|
||||||
$pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
|
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||||
|
.forEach(entity -> {
|
||||||
|
entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||||
|
dictionary.recommendEverywhere(entity);
|
||||||
|
});
|
||||||
end
|
end
|
||||||
|
|
||||||
|
rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx"
|
||||||
|
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||||
|
when
|
||||||
|
$section: Section(containsString("et al."))
|
||||||
|
then
|
||||||
|
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
||||||
|
.forEach(entity -> {
|
||||||
|
entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
|
||||||
|
dictionary.recommendEverywhere(entity);
|
||||||
|
});
|
||||||
|
end
|
||||||
|
|
||||||
// Rule unit: CBI.8
|
// Rule unit: CBI.8
|
||||||
rule "CBI.8.0: Redacted because Section contains must_redact entity"
|
rule "CBI.8.0: Redacted because Section contains must_redact entity"
|
||||||
@ -653,54 +650,56 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio
|
|||||||
|
|
||||||
|
|
||||||
// Rule unit: CBI.16
|
// Rule unit: CBI.16
|
||||||
rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx"
|
rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables"
|
||||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
|
||||||
when
|
when
|
||||||
$section: Section(containsString("et al."))
|
$section: Section(!hasTables(),
|
||||||
|
hasEntitiesOfType("published_information"),
|
||||||
|
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||||
then
|
then
|
||||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||||
.forEach(entity -> {
|
.forEach(redactionEntity -> {
|
||||||
entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
redactionEntity.skipWithReferences(
|
||||||
dictionary.recommendEverywhere(entity);
|
"CBI.16.0",
|
||||||
|
"Published Information found in section",
|
||||||
|
$section.getEntitiesOfType("published_information")
|
||||||
|
);
|
||||||
});
|
});
|
||||||
end
|
end
|
||||||
|
|
||||||
rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
|
rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row"
|
||||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
|
||||||
when
|
when
|
||||||
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
|
||||||
$section: Section(containsString("et al."))
|
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||||
|
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||||
|
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
|
||||||
then
|
then
|
||||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
$authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
|
||||||
.forEach(entity -> {
|
end
|
||||||
entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
|
||||||
dictionary.recommendEverywhere(entity);
|
rule "CBI.16.2: Do not redact PII if published information found in Section without tables"
|
||||||
|
when
|
||||||
|
$section: Section(!hasTables(),
|
||||||
|
hasEntitiesOfType("published_information"),
|
||||||
|
hasEntitiesOfType("PII"))
|
||||||
|
then
|
||||||
|
$section.getEntitiesOfType("PII")
|
||||||
|
.forEach(redactionEntity -> {
|
||||||
|
redactionEntity.skipWithReferences(
|
||||||
|
"CBI.16.2",
|
||||||
|
"Published Information found in section",
|
||||||
|
$section.getEntitiesOfType("published_information")
|
||||||
|
);
|
||||||
});
|
});
|
||||||
end
|
end
|
||||||
|
|
||||||
rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
|
rule "CBI.16.3: Do not redact PII if published information found in same table row"
|
||||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
|
||||||
when
|
when
|
||||||
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
|
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
|
||||||
$section: Section(containsString("et al."))
|
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
|
||||||
|
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
|
||||||
|
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
|
||||||
then
|
then
|
||||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
$pii.skipWithReferences("CBI.16.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
|
||||||
.forEach(entity -> {
|
|
||||||
entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
|
||||||
dictionary.recommendEverywhere(entity);
|
|
||||||
});
|
|
||||||
end
|
|
||||||
|
|
||||||
rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx"
|
|
||||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
|
||||||
when
|
|
||||||
$section: Section(containsString("et al."))
|
|
||||||
then
|
|
||||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
|
|
||||||
.forEach(entity -> {
|
|
||||||
entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
|
|
||||||
dictionary.recommendEverywhere(entity);
|
|
||||||
});
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -25,10 +25,12 @@ public class RuleFileMigrationTest {
|
|||||||
|
|
||||||
// Put your redaction service drools paths and dossier-templates paths both RM and DM here
|
// Put your redaction service drools paths and dossier-templates paths both RM and DM here
|
||||||
static final List<String> ruleFileDirs = List.of(
|
static final List<String> ruleFileDirs = List.of(
|
||||||
"/home/kschuettler/iqser/redaction/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools",
|
//"/Users/maverickstuder/Documents/RedactManager/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools",
|
||||||
"/home/kschuettler/iqser/redaction/dossier-templates-v2",
|
// "/Users/maverickstuder/Documents/RedactManager/dossier-templates-v2"
|
||||||
"/home/kschuettler/iqser/fforesight/dossier-templates-v2",
|
"/Users/maverickstuder/Documents/PM"
|
||||||
"/home/kschuettler/iqser/business-logic");
|
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user