Compare commits

...

44 Commits

Author SHA1 Message Date
maverickstuder
557990273d RED-9859: update intersecting nodes on kie session insertion 2024-09-12 12:13:03 +02:00
maverickstuder
63041927fc RED-9859: Redactions found by et. al. rule not skipped with published information
- modify unit test
2024-09-10 14:58:46 +02:00
maverickstuder
10e0c68a1f RED-9859: Redactions found by et. al. rule not skipped with published information
- switch CBI.7.* with and CBI.16.*
2024-09-10 14:58:45 +02:00
Andrei Isvoran
3ea73aa859 Merge branch 'RED-9986-bp' into 'release/4.348.x'
RED-9986 - Add component rules path to be scanned for Javadoc generation

See merge request redactmanager/redaction-service!511
2024-09-09 12:44:01 +02:00
Andrei Isvoran
179ac6d9ad RED-9986 - Add component rules path to be scanned for Javadoc generation 2024-09-09 11:33:31 +03:00
Kilian Schüttler
bdc6ab7e96 Merge branch 'RED-9964' into 'release/4.348.x'
RED-9964: fix errors with images

See merge request redactmanager/redaction-service!505
2024-09-04 09:16:11 +02:00
Kilian Schüttler
e959d60ec0 RED-9964: fix errors with images 2024-09-04 09:16:11 +02:00
Kilian Schüttler
6b6d06d24e Merge branch 'RED-9964' into 'release/4.348.x'
RED-9964: refactor getMainBody() and getMainBodyTextBlock() in Page

See merge request redactmanager/redaction-service!502
2024-09-02 16:51:05 +02:00
Kilian Schüttler
8ac0657795 RED-9964: refactor getMainBody() and getMainBodyTextBlock() in Page 2024-09-02 16:51:04 +02:00
Maverick Studer
dad17bb504 Merge branch 'RED-9865-bp2' into 'release/4.348.x'
RED-9865: fix for case 2

See merge request redactmanager/redaction-service!495
2024-08-23 17:01:21 +02:00
Maverick Studer
f445b7fe69 RED-9865: fix for case 2 2024-08-23 17:01:21 +02:00
Dominique Eifländer
0692cc90e4 Merge branch 'RED-9837-4.1' into 'release/4.348.x'
RED-9837: Fixed not working timeout with endless loop in drools then block

See merge request redactmanager/redaction-service!490
2024-08-19 14:08:31 +02:00
Dominique Eifländer
ab114b0920 RED-9837: Fixed not working timeout with endless loop in drools then block 2024-08-19 13:20:04 +02:00
Dominique Eifländer
7396c04314 Merge branch 'RED-9760-4.1-rules' into 'release/4.348.x'
RED-9760: Do not check blacklisted keywords in Strings

See merge request redactmanager/redaction-service!488
2024-08-13 12:01:49 +02:00
Dominique Eifländer
305cd8f5ac RED-9760: Do not check blacklisted keywords in Strings 2024-08-13 11:11:25 +02:00
Maverick Studer
b4ecbde89e Merge branch 'RED-9782-fix-bp' into 'release/4.348.x'
RED-9782: Automated Analysis should be disabled when uploading a document that...

See merge request redactmanager/redaction-service!484
2024-08-12 18:40:56 +02:00
Maverick Studer
7c31d4f70b RED-9782: Automated Analysis should be disabled when uploading a document that... 2024-08-12 18:40:56 +02:00
Kilian Schüttler
f08654a082 Merge branch 'hotfix' into 'release/4.348.x'
Fix UOE in ComponentDroolsExecutionService

See merge request redactmanager/redaction-service!482
2024-08-12 15:59:14 +02:00
Kilian Schüttler
2cf7f7c7b2 Fix UOE in ComponentDroolsExecutionService 2024-08-12 15:59:14 +02:00
Kilian Schüttler
a51f10b9d1 Merge branch 'RED-9869-bp' into 'release/4.348.x'
RED-9869: allow java.text and find ruleIdentifiers with whitespaces/linebreaks

See merge request redactmanager/redaction-service!480
2024-08-12 15:19:40 +02:00
Kilian Schuettler
8c36035655 RED-9869: allow java.text and find ruleIdentifiers with whitespaces/linebreaks 2024-08-12 12:39:42 +02:00
Maverick Studer
67bb4fe7f9 Merge branch 'hotfixes-dm-release' into 'release/4.348.x'
Hotfixes dm release

See merge request redactmanager/redaction-service!477
2024-08-09 16:52:40 +02:00
Maverick Studer
2a9101306c Hotfixes dm release 2024-08-09 16:52:40 +02:00
Maverick Studer
6ecac11df5 Merge branch 'RED-9857-bp' into 'release/4.348.x'
RED-9857: Add new date format

See merge request redactmanager/redaction-service!475
2024-08-09 10:30:23 +02:00
Maverick Studer
e663fd2f2a RED-9857: Add new date format 2024-08-09 10:30:22 +02:00
Dominique Eifländer
0ef4087b36 Merge branch 'RED-9760-anyheadline-4.1' into 'release/4.348.x'
RED-9760: Changed anyHeadlineContains to act like in the previous version

See merge request redactmanager/redaction-service!473
2024-08-07 15:17:33 +02:00
Dominique Eifländer
bf3ae1606b RED-9760: Changed anyHeadlineContains to act like in the previous version 2024-08-07 14:56:16 +02:00
Maverick Studer
43620f7b52 Merge branch 'RED-9782' into 'release/4.348.x'
RED-9782: Automated Analysis should be disabled when uploading a document that...

See merge request redactmanager/redaction-service!471
2024-08-07 12:26:03 +02:00
Maverick Studer
92fc003576 RED-9782: Automated Analysis should be disabled when uploading a document that... 2024-08-07 12:26:02 +02:00
Dominique Eifländer
ed02a83289 Merge branch 'RED-9782-4.1' into 'release/4.348.x'
Resolve RED-9782 "4.1"

See merge request redactmanager/redaction-service!469
2024-08-02 14:40:19 +02:00
Dominique Eifländer
78f5aaa54e Resolve RED-9782 "4.1" 2024-08-02 14:40:19 +02:00
Andrei Isvoran
acb5b4c308 Merge branch 'RED-9770' into 'release/4.348.x'
RED-9770 - Extend date converter

See merge request redactmanager/redaction-service!467
2024-07-30 10:29:48 +02:00
Andrei Isvoran
61ee1c12ca RED-9770 - Extend date converter 2024-07-30 10:56:58 +03:00
Kilian Schüttler
abec7ae6bf Merge branch 'annotationMode-bp' into 'release/4.348.x'
annotationMode: ignore IDs of manual adds in annotationMode

See merge request redactmanager/redaction-service!466
2024-07-26 14:53:41 +02:00
Kilian Schuettler
afeddb4d91 annotationMode: ignore IDs of manual adds in annotationMode 2024-07-26 14:03:28 +02:00
Dominique Eifländer
359c237943 Merge branch 'RED-9658-mongo-4.1' into 'release/4.348.x'
RED-9658: Fixed wrong mongo database name

See merge request redactmanager/redaction-service!461
2024-07-17 11:02:22 +02:00
Dominique Eifländer
9789943f45 RED-9658: Fixed wrong mongo database name 2024-07-17 10:44:30 +02:00
Andrei Isvoran
f096aab156 Merge branch 'RED-9667' into 'release/4.348.x'
RED-9667 - Extend convert dates

See merge request redactmanager/redaction-service!459
2024-07-16 16:02:20 +02:00
Andrei Isvoran
156b102e87 RED-9667 - Extend convert dates 2024-07-16 16:02:20 +02:00
Andrei Isvoran
180728721a Merge branch 'RED-9496-graceful-shutdown-bp' into 'release/4.348.x'
RED-9496 - Implement graceful shutdown

See merge request redactmanager/redaction-service!457
2024-07-04 13:56:45 +02:00
Andrei Isvoran
fb9d1042ac RED-9496 - Implement graceful shutdown 2024-07-04 14:03:43 +03:00
Corina Olariu
046b4b29b9 Merge branch 'RED-9466-bp' into 'release/4.348.x'
RED-9466 - Adding annotation removes all AI based recommendations until forced re-analysis

See merge request redactmanager/redaction-service!452
2024-06-28 15:31:16 +02:00
Corina Olariu
dce797ef8e RED-9466 - Adding annotation removes all AI based recommendations until forced re-analysis 2024-06-28 15:31:16 +02:00
Kilian Schüttler
8b8dab2a18 RED-9375: use storageId for cache names everywhere, such that name may be updated by a user
(cherry picked from commit c1a2e9dee209413ca7a3738dc746ee2397aa1319)
2024-06-27 16:13:24 +02:00
46 changed files with 2094 additions and 479 deletions

View File

@ -4,7 +4,7 @@ plugins {
}
description = "redaction-service-api-v1"
val persistenceServiceVersion = "2.439.0"
val persistenceServiceVersion = "2.465.60"
dependencies {
implementation("org.springframework:spring-web:6.0.12")

View File

@ -12,11 +12,11 @@ plugins {
description = "redaction-service-server-v1"
val layoutParserVersion = "0.141.0"
val layoutParserVersion = "0.142.6"
val jacksonVersion = "2.15.2"
val droolsVersion = "9.44.0.Final"
val pdfBoxVersion = "3.0.0"
val persistenceServiceVersion = "2.444.0"
val persistenceServiceVersion = "2.465.60"
val springBootStarterVersion = "3.1.5"
val springCloudVersion = "4.0.4"
val testContainersVersion = "1.19.7"
@ -43,6 +43,7 @@ dependencies {
implementation("com.iqser.red.commons:storage-commons:2.45.0")
implementation("com.knecon.fforesight:tenant-commons:0.24.0")
implementation("com.knecon.fforesight:tracing-commons:0.5.0")
implementation("com.knecon.fforesight:lifecycle-commons:0.6.0")
implementation("com.fasterxml.jackson.module:jackson-module-afterburner:${jacksonVersion}")
implementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:${jacksonVersion}")
@ -130,18 +131,19 @@ tasks.named<BootBuildImage>("bootBuildImage") {
}
}
fun parseDroolsImports(droolsFilePath: String): List<String> {
fun parseDroolsImports(vararg droolsFilePaths: String): List<String> {
val imports = mutableListOf<String>()
val importPattern = Regex("^import\\s+(com\\.iqser\\.red\\.service\\.redaction\\.v1\\.[\\w.]+);")
val desiredPrefix = "com.iqser.red.service.redaction.v1"
File(droolsFilePath).forEachLine { line ->
importPattern.find(line)?.let { matchResult ->
val importPath = matchResult.groupValues[1].trim()
if (importPath.startsWith(desiredPrefix)) {
val formattedPath = importPath.replace('.', '/')
imports.add("$formattedPath.java")
droolsFilePaths.forEach { filePath ->
File(filePath).forEachLine { line ->
importPattern.find(line)?.let { matchResult ->
val importPath = matchResult.groupValues[1].trim()
if (importPath.startsWith(desiredPrefix)) {
val formattedPath = importPath.replace('.', '/')
imports.add("$formattedPath.java")
}
}
}
}
@ -149,7 +151,11 @@ fun parseDroolsImports(droolsFilePath: String): List<String> {
return imports
}
val droolsImports = parseDroolsImports("redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl")
// Combine imports from both drools files
val droolsImports = parseDroolsImports(
"redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl",
"redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/base_component_rules.drl"
)
tasks.register("generateJavaDoc", Javadoc::class) {

View File

@ -13,6 +13,7 @@ import org.springframework.boot.context.properties.EnableConfigurationProperties
import org.springframework.cache.annotation.EnableCaching;
import org.springframework.cloud.openfeign.EnableFeignClients;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.EnableAspectJAutoProxy;
import org.springframework.context.annotation.Import;
import org.springframework.data.mongodb.repository.config.EnableMongoRepositories;
@ -20,6 +21,7 @@ import com.iqser.red.service.dictionarymerge.commons.DictionaryMergeService;
import com.iqser.red.service.persistence.service.v1.api.shared.mongo.SharedMongoAutoConfiguration;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.knecon.fforesight.lifecyclecommons.LifecycleAutoconfiguration;
import com.knecon.fforesight.mongo.database.commons.MongoDatabaseCommonsAutoConfiguration;
import com.knecon.fforesight.mongo.database.commons.liquibase.EnableMongoLiquibase;
import com.knecon.fforesight.tenantcommons.MultiTenancyAutoConfiguration;
@ -32,13 +34,14 @@ import lombok.extern.slf4j.Slf4j;
@Slf4j
@EnableCaching
@ImportAutoConfiguration({MultiTenancyAutoConfiguration.class, SharedMongoAutoConfiguration.class})
@ImportAutoConfiguration({MultiTenancyAutoConfiguration.class, SharedMongoAutoConfiguration.class, LifecycleAutoconfiguration.class})
@Import({MetricsConfiguration.class, StorageAutoConfiguration.class, MongoDatabaseCommonsAutoConfiguration.class})
@EnableFeignClients(basePackageClasses = RulesClient.class)
@EnableConfigurationProperties(RedactionServiceSettings.class)
@EnableMongoRepositories(basePackages = "com.iqser.red.service.persistence")
@EnableMongoLiquibase
@SpringBootApplication(exclude = {SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class, DataSourceAutoConfiguration.class, LiquibaseAutoConfiguration.class, MongoAutoConfiguration.class, MongoDataAutoConfiguration.class})
@EnableAspectJAutoProxy
public class Application {
public static void main(String[] args) {

View File

@ -21,6 +21,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
@ -119,9 +120,14 @@ public class RedactionLogToEntityLogMigrationService {
.filter(MigrationEntity::needsManualEntry)
.map(MigrationEntity::buildManualRedactionEntry)
.toList();
idsToMigrateInDb.setManualRedactionEntriesToAdd(manualRedactionEntriesToAdd);
List<String> manualForceRedactionIdsToDelete = entitiesToMigrate.stream()
.filter(MigrationEntity::needsForceDeletion)
.map(MigrationEntity::getNewId)
.toList();
idsToMigrateInDb.setForceRedactionIdsToDelete(manualForceRedactionIdsToDelete);
return new MigratedEntityLog(idsToMigrateInDb, entityLog);
}

View File

@ -23,6 +23,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualChangeFactory;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
@ -220,6 +221,11 @@ public final class MigrationEntity {
&& !entityLogEntry.getChanges().isEmpty()
&& entityLogEntry.getChanges().stream().map(Change::getType).toList().get(entityLogEntry.getChanges().size() - 1).equals(ChangeType.REMOVED)) {
entityLogEntry.setState(EntryState.REMOVED);
if (!entityLogEntry.getManualChanges().isEmpty()) {
entityLogEntry.getManualChanges()
.removeIf(manualChange -> manualChange.getManualRedactionType()
.equals(com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.FORCE));
}
}
return entityLogEntry;
@ -466,4 +472,10 @@ public final class MigrationEntity {
.anyMatch(mc -> mc instanceof ManualResizeRedaction && !((ManualResizeRedaction) mc).getUpdateDictionary()) && !(migratedEntity instanceof Image);
}
public boolean needsForceDeletion() {
return manualChanges.stream()
.anyMatch(mc -> mc instanceof ManualForceRedaction) && this.precursorEntity != null && this.precursorEntity.removed();
}
}

View File

@ -120,7 +120,8 @@ public class PrecursorEntity implements IEntity {
EntityType entityType = getEntityType(entryType);
String value = Optional.ofNullable(importedRedaction.getValue())
.orElse("");
return PrecursorEntity.builder()
PrecursorEntityBuilder precursorEntityBuilder = PrecursorEntity.builder()
.id(importedRedaction.getId())
.value(value)
.entityPosition(rectangleWithPages)
@ -130,14 +131,21 @@ public class PrecursorEntity implements IEntity {
.orElse(""))
.type(Optional.ofNullable(importedRedaction.getType())
.orElse(IMPORTED_REDACTION_TYPE))
.section(importedRedaction.getManualOverwriteSection())
.section(Optional.ofNullable(importedRedaction.getSection())
.orElse(""))
.entityType(entityType)
.isDictionaryEntry(false)
.isDossierDictionaryEntry(false)
.manualOverwrite(new ManualChangeOverwrite(entityType))
.rectangle(value.isBlank() || entryType.equals(EntryType.IMAGE) || entryType.equals(EntryType.IMAGE_HINT) || entryType.equals(EntryType.AREA))
.manualOverwrite(new ManualChangeOverwrite(entityType, importedRedaction.getManualOverwriteSection()))
.engines(Set.of(Engine.IMPORTED))
.build();
.engines(Set.of(Engine.IMPORTED));
if (importedRedaction.getManualOverwriteSection() != null && !importedRedaction.getManualOverwriteSection().isEmpty()) {
precursorEntityBuilder.section(importedRedaction.getManualOverwriteSection())
.manualOverwrite(new ManualChangeOverwrite(entityType, importedRedaction.getManualOverwriteSection()));
}
return precursorEntityBuilder.build();
}

View File

@ -288,8 +288,8 @@ public class DocumentTree {
if (treeId.isEmpty()) {
return root;
}
Entry entry = root.children.get(treeId.get(0));
for (int id : treeId.subList(1, treeId.size())) {
Entry entry = root;
for (int id : treeId) {
entry = entry.children.get(id);
}
return entry;

View File

@ -3,8 +3,10 @@ package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
@ -35,7 +37,7 @@ public class Page {
Integer width;
Integer rotation;
List<SemanticNode> mainBody;
List<AtomicTextBlock> textBlocksOnPage;
Header header;
Footer footer;
@ -53,13 +55,63 @@ public class Page {
*/
public TextBlock getMainBodyTextBlock() {
return mainBody.stream()
.filter(SemanticNode::isLeaf)
.map(SemanticNode::getTextBlock)
return textBlocksOnPage.stream()
.filter(atb -> !atb.isEmpty())
.collect(new TextBlockCollector());
}
/**
* Retrieves the highest SemanticNodes, which appear only on this page. It is achieved by traversing the DocumentTree up, until a SemanticNode's direct parent is no longer exclusively on this page.
*
* @return A list which contains the highes SemanticNodes, which appear only on this page.
*/
public List<SemanticNode> getMainBody() {
return textBlocksOnPage.stream()
.map(AtomicTextBlock::getParent)
.map(this::getHighestParentOnlyOnPage)
.distinct()
.toList();
}
/**
* Retrieves the highest SemanticNodes which are present on the page. There might be multiples, as two or more Main Sections start on a page.
* This is achieved by traversing up the document tree and returning all SemanticNodes whose direct parent is the Document
*
* @return A list of the highest SemanticNodes present on this page
*/
public Stream<SemanticNode> streamHighestSemanticNodesOnPage() {
return textBlocksOnPage.stream()
.map(AtomicTextBlock::getParent)
.map(this::getHighestSemanticNodeOnPage)
.distinct();
}
private SemanticNode getHighestParentOnlyOnPage(SemanticNode node) {
SemanticNode currentNode = node;
while (currentNode.hasParent() && currentNode.getParent().onlyOnPage(this)) {
currentNode = currentNode.getParent();
}
return currentNode;
}
private SemanticNode getHighestSemanticNodeOnPage(SemanticNode node) {
SemanticNode currentNode = node;
while (currentNode.hasParent() //
&& !currentNode.getParent().getType().equals(NodeType.DOCUMENT)) {
currentNode = currentNode.getParent();
}
return currentNode;
}
@Override
public String toString() {

View File

@ -1,10 +1,12 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
@ -33,7 +35,6 @@ import lombok.extern.slf4j.Slf4j;
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true)
public class Section extends AbstractSemanticNode {
@Override
public NodeType getType() {
@ -60,7 +61,6 @@ public class Section extends AbstractSemanticNode {
}
@Override
public String toString() {
@ -85,7 +85,14 @@ public class Section extends AbstractSemanticNode {
*/
public boolean anyHeadlineContainsString(String value) {
return streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsString(value));
boolean found = streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsString(value)) || getHeadline().containsString(value);
if (!found) {
List<Headline> previousHeadlines = new ArrayList<>();
headlinesByPreviousSibling(this, previousHeadlines);
return previousHeadlines.stream()
.anyMatch(headline -> headline.containsString(value));
}
return true;
}
@ -97,8 +104,37 @@ public class Section extends AbstractSemanticNode {
*/
public boolean anyHeadlineContainsStringIgnoreCase(String value) {
return streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsStringIgnoreCase(value));
boolean found = streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsStringIgnoreCase(value)) || getHeadline().containsStringIgnoreCase(value);
if (!found) {
List<Headline> previousHeadlines = new ArrayList<>();
headlinesByPreviousSibling(this, previousHeadlines);
return previousHeadlines.stream()
.anyMatch(headline -> headline.containsStringIgnoreCase(value));
}
return true;
}
private void headlinesByPreviousSibling(SemanticNode section, List<Headline> found) {
if (section.getPreviousSibling()
.isPresent() && section.getPreviousSibling()
.get() instanceof Section previousSection) {
var subnodes = previousSection.streamAllSubNodes()
.toList();
if (subnodes.size() == 1 && subnodes.get(0) instanceof Headline previousHeadline) {
found.add(previousHeadline);
headlinesByPreviousSibling(previousSection, found);
}
}
if (section.getPreviousSibling()
.isPresent() && section.getPreviousSibling()
.get() instanceof Headline previousHeadline) {
found.add(previousHeadline);
headlinesByPreviousSibling(previousHeadline, found);
}
}
}

View File

@ -808,4 +808,17 @@ public interface SemanticNode {
streamChildren().forEach(childNode -> childNode.accept(visitor));
}
/**
* Checks wether this SemanticNode appears on a single page only, and if that page is the provided one.
*
* @param page the page to check
* @return true, when SemanticNode is on a single page only and the page is the provided page. Otherwise, false.
*/
default boolean onlyOnPage(Page page) {
Set<Page> pages = getPages();
return pages.size() == 1 && pages.contains(page);
}
}

View File

@ -110,7 +110,14 @@ public class RedactionMessageReceiver {
log.info("-------------------------------------------------------------------------------------------------");
shouldRespond = false;
break;
case IMPORTED_REDACTIONS_ONLY:
log.info("------------------------------Imported Redactions Analysis Only------------------------------------------");
log.info("Starting Imported Redactions Analysis Only for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
log.debug(analyzeRequest.getManualRedactions().toString());
result = analyzeService.analyzeImportedRedactionsOnly(analyzeRequest);
log.info("Successful Imported Redactions Analysis Only dossier {} file {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId());
log.info("-------------------------------------------------------------------------------------------------");
break;
default:
throw new IllegalArgumentException("Unknown MessageType: " + analyzeRequest.getMessageType());
}

View File

@ -23,8 +23,10 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileTyp
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.componentlog.ComponentLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogChanges;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedLegalBases;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.mapper.ImportedLegalBasisMapper;
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel;
import com.iqser.red.service.redaction.v1.server.model.KieWrapper;
@ -76,6 +78,7 @@ public class AnalyzeService {
ImportedRedactionEntryService importedRedactionEntryService;
ObservedStorageService observedStorageService;
FunctionTimerValues redactmanagerAnalyzePagewiseValues;
ImportedLegalBasisMapper importedLegalBasisMapper = ImportedLegalBasisMapper.INSTANCE;
@Timed("redactmanager_reanalyze")
@ -128,7 +131,7 @@ public class AnalyzeService {
document,
document.getNumberOfPages(),
true,
Collections.emptySet());
new HashSet<>());
}
KieWrapper kieWrapperEntityRules = kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.ENTITY);
@ -245,6 +248,39 @@ public class AnalyzeService {
}
@Timed("redactmanager_analyzeImportedRedactionsOnly")
@Observed(name = "AnalyzeService", contextualName = "analyzeImportedRedactionsOnly")
public AnalyzeResult analyzeImportedRedactionsOnly(AnalyzeRequest analyzeRequest) {
long startTime = System.currentTimeMillis();
dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
Document document = DocumentGraphMapper.toDocumentGraph(observedStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
log.info("Loaded Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
ImportedRedactions importedRedactions = redactionStorageService.getImportedRedactions(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
log.info("Loaded Imported Redactions for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
ImportedLegalBases importedLegalBases = redactionStorageService.getImportedLegalBases(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
log.info("Loaded Imported Legal Bases for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
var notFoundImportedEntries = importedRedactionEntryService.addImportedEntriesAndReturnNotFoundEntries(analyzeRequest, importedRedactions, document);
EntityLogChanges entityLogChanges = entityLogCreatorService.createInitialEntityLog(analyzeRequest, document, notFoundImportedEntries, new DictionaryVersion(0, 0), 0);
entityLogChanges.getEntityLog()
.setLegalBasis(importedLegalBases.getImportedLegalBases()
.stream()
.map(importedLegalBasisMapper::toEntityLogLegalBasis)
.toList());
notFoundImportedEntitiesService.processEntityLog(entityLogChanges.getEntityLog(), analyzeRequest, notFoundImportedEntries);
return finalizeAnalysis(analyzeRequest, startTime, KieWrapper.empty(), entityLogChanges, document, document.getNumberOfPages(), false, new HashSet<>());
}
private AnalyzeResult finalizeAnalysis(AnalyzeRequest analyzeRequest,
long startTime,
KieWrapper kieWrapperComponentRules,
@ -367,7 +403,7 @@ public class AnalyzeService {
return new NerEntitiesModel(nerEntitiesModel.getData().entrySet()
.stream() //
.filter(entry -> sectionsToReanalyseIds.contains(entry.getKey())) //
.filter(entry -> sectionsToReanalyseIds.contains(getSuperSectionID(entry.getKey()))) //
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
}
@ -383,4 +419,11 @@ public class AnalyzeService {
return nerEntities;
}
private static Integer getSuperSectionID(String section) {
return NerEntitiesAdapter.sectionNumberToTreeId(section)
.get(0);
}
}

View File

@ -223,7 +223,6 @@ public class EntityLogCreatorService {
String type = precursorEntity.getManualOverwrite().getType()
.orElse(precursorEntity.getType());
boolean isHint = isHint(precursorEntity.getEntityType());
return EntityLogEntry.builder()
.id(precursorEntity.getId())
.reason(precursorEntity.buildReasonWithManualChangeDescriptions())

View File

@ -40,7 +40,8 @@ public class ComponentMappingFileSystemCache {
public File getComponentMappingFile(ComponentMappingMetadata metadata) {
Path mappingFile = getMappingFileFromMetadata(metadata);
Path mappingFileMetaDataFile = mappingFile.resolveSibling(metadata.getName() + METADATA_SUFFIX);
Path mappingFileMetaDataFile = getMappingMetadataFileFromMetadata(metadata);
synchronized (ComponentMappingFileSystemCache.class) {
if (fileExistsAndUpToDate(metadata, mappingFile, mappingFileMetaDataFile)) {
@ -52,7 +53,7 @@ public class ComponentMappingFileSystemCache {
InputStreamResource inputStreamResource = storageService.getObject(TenantContext.getTenantId(), metadata.getStorageId());
Files.write(mappingFile, inputStreamResource.getContentAsByteArray(), StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.CREATE);
mapper.writeValue(mappingFileMetaDataFile.toFile(), metadata);
return mappingFile.toFile();
@ -87,6 +88,13 @@ public class ComponentMappingFileSystemCache {
}
private Path getMappingMetadataFileFromMetadata(ComponentMappingMetadata metadata) {
Path tenantStem = mappingFileDir.resolve(TenantContext.getTenantId());
return tenantStem.resolve(metadata.getStorageId() + METADATA_SUFFIX);
}
private boolean fileExistsAndUpToDate(ComponentMappingMetadata metadata, Path mappingFile, Path mappingFileMetaDataFile) {
if (mappingFile.toFile().exists() && mappingFile.toFile().isFile() && mappingFileMetaDataFile.toFile().exists() && mappingFileMetaDataFile.toFile().isFile()) {

View File

@ -4,6 +4,7 @@ import java.io.File;
import java.io.FileReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.time.Duration;
import java.util.List;
import org.springframework.stereotype.Service;
@ -36,7 +37,7 @@ public class ComponentMappingMemoryCache {
public ComponentMappingMemoryCache(ComponentMappingFileSystemCache componentMappingFileSystemCache) {
this.fileSystemCache = componentMappingFileSystemCache;
cache = CacheBuilder.newBuilder().maximumWeight(MAX_NUMBER_OF_LINES).weigher(COMPONENT_MAPPING_WEIGHER).build();
cache = CacheBuilder.newBuilder().maximumWeight(MAX_NUMBER_OF_LINES).weigher(COMPONENT_MAPPING_WEIGHER).expireAfterAccess(Duration.ofDays(1)).build();
}

View File

@ -1,6 +1,5 @@
package com.iqser.red.service.redaction.v1.server.service.document;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@ -64,7 +63,7 @@ public class DocumentGraphMapper {
for (DocumentStructure.EntryData entryData : entries) {
List<Page> pages = Arrays.stream(entryData.getPageNumbers())
.map(pageNumber -> getPage(pageNumber, context))
.map(context::getPage)
.toList();
SemanticNode node = switch (entryData.getType()) {
@ -83,6 +82,15 @@ public class DocumentGraphMapper {
if (entryData.getAtomicBlockIds().length > 0) {
TextBlock textBlock = toTextBlock(entryData.getAtomicBlockIds(), context, node);
node.setLeafTextBlock(textBlock);
switch (entryData.getType()) {
case HEADER -> pages.forEach(page -> page.setHeader((Header) node));
case FOOTER -> pages.forEach(page -> page.setFooter((Footer) node));
case IMAGE -> pages.forEach(page -> page.getImages().add((Image) node));
default -> textBlock.getAtomicTextBlocks()
.forEach(atb -> atb.getPage().getTextBlocksOnPage().add(atb));
}
}
List<Integer> treeId = Arrays.stream(entryData.getTreeId()).boxed()
.toList();
@ -94,13 +102,8 @@ public class DocumentGraphMapper {
}
node.setTreeId(treeId);
switch (entryData.getType()) {
case HEADER -> pages.forEach(page -> page.setHeader((Header) node));
case FOOTER -> pages.forEach(page -> page.setFooter((Footer) node));
default -> pages.forEach(page -> page.getMainBody().add(node));
}
newEntries.add(DocumentTree.Entry.builder().treeId(treeId).children(buildEntries(entryData.getChildren(), context)).node(node).build());
}
return newEntries;
}
@ -115,7 +118,7 @@ public class DocumentGraphMapper {
private Image buildImage(Context context, Map<String, String> properties, Long[] pageNumbers) {
assert pageNumbers.length == 1;
Page page = getPage(pageNumbers[0], context);
Page page = context.getPage(pageNumbers[0]);
var builder = Image.builder();
PropertiesMapper.parseImageProperties(properties, builder);
return builder.documentTree(context.documentTree).page(page).build();
@ -161,6 +164,7 @@ public class DocumentGraphMapper {
return SuperSection.builder().documentTree(context.documentTree).build();
}
private Paragraph buildParagraph(Context context, Map<String, String> properties) {
if (PropertiesMapper.isDuplicateParagraph(properties)) {
@ -189,21 +193,13 @@ public class DocumentGraphMapper {
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)),
context.documentPositionData.get(Math.toIntExact(atomicTextBlockId)),
parent,
getPage(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)).getPage(), context));
context.getPage(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)).getPage()));
}
private Page buildPage(DocumentPage p) {
return Page.builder().rotation(p.getRotation()).height(p.getHeight()).width(p.getWidth()).number(p.getNumber()).mainBody(new LinkedList<>()).build();
}
private Page getPage(Long pageIndex, Context context) {
Page page = context.pageData.get(Math.toIntExact(pageIndex) - 1);
assert page.getNumber() == Math.toIntExact(pageIndex);
return page;
return Page.builder().rotation(p.getRotation()).height(p.getHeight()).width(p.getWidth()).number(p.getNumber()).textBlocksOnPage(new LinkedList<>()).build();
}
@ -226,6 +222,14 @@ public class DocumentGraphMapper {
}
private Page getPage(Long pageIndex) {
Page page = pageData.get(Math.toIntExact(pageIndex) - 1);
assert page.getNumber() == Math.toIntExact(pageIndex);
return page;
}
}
}

View File

@ -614,21 +614,21 @@ public class EntityCreationService {
/**
* Looks across the remaining table row to the right of the provided TableCell if any line intersects the y coordinates of the found text.
*
* @param TextRanges a list of textRanges
* @param textRanges a list of textRanges
* @param tableCell the table cell
* @param type the type
* @param entityType the entity type
* @param tableNode the table node
* @return a stream of RedactionEntities
*/
private Stream<TextEntity> lineAfterBoundariesAcrossColumns(List<TextRange> TextRanges, TableCell tableCell, String type, EntityType entityType, Table tableNode) {
private Stream<TextEntity> lineAfterBoundariesAcrossColumns(List<TextRange> textRanges, TableCell tableCell, String type, EntityType entityType, Table tableNode) {
return TextRanges.stream()
return textRanges.stream()
.map(boundary -> RectangleTransformations.rectangle2DBBox(tableCell.getTextBlock().getPositions(boundary)))
.map(bBox -> Pair.of(bBox.getMaxY(), bBox.getMinY()))
.map(maxMinPair -> tableNode.streamRow(tableCell.getRow())
.filter(nextTableCell -> nextTableCell.getCol() > tableCell.getCol())
.map(nextTableCell -> RedactionSearchUtility.findTextRangesOfAllLinesInYRange(maxMinPair.getLeft(), maxMinPair.getRight(), nextTableCell.getTextBlock()))
.map(nextTableCell -> RedactionSearchUtility.findTextRangesOfAllLinesWithCloseYCoordinates(maxMinPair.getLeft(), maxMinPair.getRight(), nextTableCell.getTextBlock()))
.map(b -> b.trim(tableNode.getTextBlock()))
.filter(boundary -> isValidEntityTextRange(tableNode.getTextBlock(), boundary))
.map(boundary -> byTextRange(boundary, type, entityType, tableNode))
@ -1160,6 +1160,10 @@ public class EntityCreationService {
if (kieSession != null) {
kieSession.insert(textEntity);
textEntity.getIntersectingNodes()
.stream()
.filter(nodesInKieSession::contains)
.forEach(o -> kieSession.update(kieSession.getFactHandle(o), o));
}
}

View File

@ -11,10 +11,12 @@ import java.util.stream.Collectors;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
@ -35,6 +37,7 @@ public class EntityFromPrecursorCreationService {
static double MATCH_THRESHOLD = 10; // Is compared to the average sum of distances in pdf coordinates for each corner of the bounding box of the entities
EntityFindingUtility entityFindingUtility;
DictionaryService dictionaryService;
RedactionServiceSettings settings;
public List<PrecursorEntity> createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions manualRedactions, SemanticNode node, String dossierTemplateId) {
@ -92,7 +95,7 @@ public class EntityFromPrecursorCreationService {
notFoundEntities.add(precursorEntity);
continue;
}
createCorrectEntity(precursorEntity, optionalClosestEntity.get());
createCorrectEntity(precursorEntity, optionalClosestEntity.get(), settings.isAnnotationMode());
}
tempEntitiesByValue.values()
@ -125,12 +128,16 @@ public class EntityFromPrecursorCreationService {
precursorEntity.getEntityType(),
closestEntity.getDeepestFullyContainingNode());
} else {
correctEntity = TextEntity.initialEntityNode(closestEntity.getTextRange(),
precursorEntity.type(),
precursorEntity.getEntityType(),
precursorEntity.getId(),
precursorEntity.getManualOverwrite().getSection()
.orElse(null));
String section = precursorEntity.getManualOverwrite().getSection()
.orElse(null);
if ((section == null || section.isBlank())
&& precursorEntity.getSection() != null
&& !precursorEntity.getSection().isBlank()
&& precursorEntity.getEngines().contains(Engine.IMPORTED)) {
section = precursorEntity.getSection();
}
correctEntity = TextEntity.initialEntityNode(closestEntity.getTextRange(), precursorEntity.type(), precursorEntity.getEntityType(), precursorEntity.getId(), section);
}
correctEntity.setDeepestFullyContainingNode(closestEntity.getDeepestFullyContainingNode());
correctEntity.setIntersectingNodes(new ArrayList<>(closestEntity.getIntersectingNodes()));

View File

@ -177,7 +177,7 @@ public class NerEntitiesAdapter {
}
private static List<Integer> sectionNumberToTreeId(String sectionNumber) {
public static List<Integer> sectionNumberToTreeId(String sectionNumber) {
return Arrays.stream(sectionNumber.split("\\."))
.map(Integer::parseInt)

View File

@ -11,7 +11,6 @@ import java.util.stream.Stream;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
@ -27,7 +26,6 @@ import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryIncr
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryIncrementValue;
import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
import io.micrometer.core.annotation.Timed;
import lombok.AccessLevel;
@ -51,7 +49,6 @@ public class SectionFinderService {
long start = System.currentTimeMillis();
Set<Integer> sectionsToReanalyse = new HashSet<>();
var dictionaryIncrementsSearch = new SearchImplementation(dictionaryIncrement.getValues()
.stream()
.map(DictionaryIncrementValue::getValue)
@ -82,9 +79,7 @@ public class SectionFinderService {
return document.getPages()
.stream()
.filter(page -> relevantPagesForReanalysis.contains(page.getNumber()))
.flatMap(page -> Stream.concat(page.getMainBody()
.stream()
.filter(node -> node.getType().equals(NodeType.SECTION)), Stream.of(page.getHeader(), page.getFooter())))
.flatMap(page -> Stream.concat(page.streamHighestSemanticNodesOnPage(), Stream.of(page.getHeader(), page.getFooter())))
.map(node -> node.getTreeId()
.get(0))
.toList();

View File

@ -73,9 +73,10 @@ public class ComponentDroolsExecutionService {
entities.add(Entity.fromEntityLogEntry(entry, document, entry.getStartOffset(), entry.getEndOffset()));
if (entry.getDuplicatedTextRanges() != null && !entry.getDuplicatedTextRanges().isEmpty()) {
entry.getDuplicatedTextRanges()
.forEach(duplicatedTextRange -> {
entities.add(Entity.fromEntityLogEntry(entry, document, duplicatedTextRange.getStart(), duplicatedTextRange.getEnd()));
});
.forEach(duplicatedTextRange -> entities.add(Entity.fromEntityLogEntry(entry,
document,
duplicatedTextRange.getStart(),
duplicatedTextRange.getEnd())));
}
return entities.stream();
})
@ -94,8 +95,7 @@ public class ComponentDroolsExecutionService {
});
try {
completableFuture.orTimeout(settings.getDroolsExecutionTimeoutSecs(document.getNumberOfPages()), TimeUnit.SECONDS)
.get();
completableFuture.get(settings.getDroolsExecutionTimeoutSecs(document.getNumberOfPages()), TimeUnit.SECONDS);
} catch (ExecutionException e) {
kieSession.dispose();
if (e.getCause() instanceof TimeoutException) {
@ -105,6 +105,8 @@ public class ComponentDroolsExecutionService {
} catch (InterruptedException e) {
kieSession.dispose();
throw new RuntimeException(e);
} catch (TimeoutException e) {
throw new DroolsTimeoutException(e, false, RuleFileType.COMPONENT);
}
List<FileAttribute> resultingFileAttributes = getFileAttributes(kieSession);

View File

@ -45,7 +45,7 @@ public class DroolsValidationService {
private final KieContainerCreationService kieContainerCreationService;
private final DeprecatedElementsFinder deprecatedElementsFinder;
private static final Pattern allowedImportsPattern = Pattern.compile(
"^(?:import\\s+static\\s+)?(?:import\\s+)?(?:com\\.knecon\\.fforesight|com\\.iqser\\.red|java\\.util)\\..*;$");
"^(?:import\\s+static\\s+)?(?:import\\s+)?(?:com\\.knecon\\.fforesight|com\\.iqser\\.red|java\\.util|java\\.text)\\..*;$");
public static final String LINEBREAK_MATCHER = "\\R";
@ -283,7 +283,9 @@ public class DroolsValidationService {
private DroolsBlacklistErrorMessage checkAndGetBlackListedMessages(SearchImplementation blacklistedKeywordSearchImplementation, String stringToCheck, int lineIndexStart) {
String sanitizedRuleText = StringUtils.deleteWhitespace(stringToCheck);
String nonWhitespaceRuleText = StringUtils.deleteWhitespace(stringToCheck);
String sanitizedRuleText= nonWhitespaceRuleText.replaceAll("\"(\\\\.|[^\"\\\\])*\"|'(\\\\.|[^'\\\\])*'" ,"");
List<SearchImplementation.MatchPosition> matches = blacklistedKeywordSearchImplementation.getMatches(sanitizedRuleText);
if (!matches.isEmpty()) {

View File

@ -129,8 +129,7 @@ public class EntityDroolsExecutionService {
});
try {
completableFuture.orTimeout(settings.getDroolsExecutionTimeoutSecs(document.getNumberOfPages()), TimeUnit.SECONDS)
.get();
completableFuture.get(settings.getDroolsExecutionTimeoutSecs(document.getNumberOfPages()), TimeUnit.SECONDS);
} catch (ExecutionException e) {
kieSession.dispose();
if (e.getCause() instanceof TimeoutException) {
@ -140,6 +139,8 @@ public class EntityDroolsExecutionService {
} catch (InterruptedException e) {
kieSession.dispose();
throw new RuntimeException(e);
} catch (TimeoutException e) {
throw new DroolsTimeoutException(e, false, RuleFileType.ENTITY);
}
List<FileAttribute> resultingFileAttributes = getFileAttributes(kieSession);

View File

@ -32,7 +32,7 @@ import lombok.experimental.UtilityClass;
public class RuleFileParser {
private final static Pattern ruleIdentifierInCodeFinder = Pattern.compile(
"\\b(?:redact|apply|skip|remove|ignore|applyWithLineBreaks|applyWithReferences|skipWithReferences)\\s*\\(\"([a-zA-Z0-9]+.\\d+.\\d+)\",.*(?:, .*)?\\)");
"\\b(?:redact|apply|skip|remove|ignore|applyWithLineBreaks|applyWithReferences|skipWithReferences)\\s*\\(\\s*\"([a-zA-Z0-9]+.\\d+.\\d+)\"\\s*,\\s*.*(?:\\s*,\\s*.*)\\s*?\\)");
@SneakyThrows
@ -78,7 +78,8 @@ public class RuleFileParser {
.map(GlobalDescr::getLine)
.orElse(0),
allQueries,
ruleClasses, customDroolsValidation);
ruleClasses,
customDroolsValidation);
}

View File

@ -9,10 +9,12 @@ import java.util.Set;
import java.util.stream.Collectors;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.context.annotation.Import;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.componentlog.ComponentLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedLegalBases;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactionsPerPage;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
@ -147,6 +149,21 @@ public class RedactionStorageService {
}
@Timed("redactmanager_getImportedLegalBases")
public ImportedLegalBases getImportedLegalBases(String dossierId, String fileId) {
try {
return storageService.readJSONObject(TenantContext.getTenantId(),
StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMPORTED_LEGAL_BASES),
ImportedLegalBases.class);
} catch (StorageObjectDoesNotExist e) {
log.debug("Imported legal bases not available.");
return new ImportedLegalBases();
}
}
@Deprecated(forRemoval = true)
@Timed("redactmanager_getRedactionLog")
public RedactionLog getRedactionLog(String dossierId, String fileId) {

View File

@ -18,7 +18,7 @@ public class MigratedIdsCollector implements Collector<MigrationEntity, Migrated
@Override
public Supplier<MigratedIds> supplier() {
return () -> new MigratedIds(new LinkedList<>(), Collections.emptyList());
return () -> new MigratedIds(new LinkedList<>(), Collections.emptyList(), Collections.emptyList());
}

View File

@ -3,8 +3,10 @@ package com.iqser.red.service.redaction.v1.server.utils;
import static java.lang.String.format;
import java.awt.geom.Rectangle2D;
import java.awt.geom.RectangularShape;
import java.util.LinkedList;
import java.util.List;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
@ -154,14 +156,8 @@ public class RedactionSearchUtility {
*/
public static TextRange findTextRangesOfAllLinesInYRange(double maxY, double minY, TextBlock textBlock) {
List<TextRange> lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed()
.map(textBlock::getLineTextRange)
.filter(lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary))
.toList();
if (lineBoundaries.isEmpty()) {
return new TextRange(textBlock.getTextRange().start(), textBlock.getTextRange().start());
}
return TextRange.merge(lineBoundaries);
Predicate<TextRange> isWithinYRange = lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary);
return filterLineBoundaries(textBlock, isWithinYRange);
}
@ -172,6 +168,49 @@ public class RedactionSearchUtility {
}
/**
* Identifies all lines within a text block that have roughly the same vertical coordinates.
*
* @param maxY The maximum Y-coordinate of the vertical range.
* @param minY The minimum Y-coordinate of the vertical range.
* @param textBlock The text block containing the lines to be checked.
* @return A {@link TextRange} encompassing all lines within the specified Y-coordinate range.
*/
public static TextRange findTextRangesOfAllLinesWithCloseYCoordinates(Double maxY, Double minY, TextBlock textBlock) {
double averageLineHeight = IntStream.range(0, textBlock.numberOfLines()).boxed()
.map(textBlock::getLineTextRange)
.flatMap((TextRange stringTextRange) -> textBlock.getPositions(stringTextRange)
.stream())
.map(RectangularShape::getHeight)
.mapToDouble(Double::doubleValue).average()
.orElse(0);
Predicate<TextRange> hasCloseYRange = lineBoundary -> areYCoordinatesClose(maxY, minY, textBlock, lineBoundary, averageLineHeight);
return filterLineBoundaries(textBlock, hasCloseYRange);
}
private static boolean areYCoordinatesClose(Double maxY, Double minY, TextBlock textBlock, TextRange lineTextRange, double averageLineHeight) {
Rectangle2D lineBBox = RectangleTransformations.rectangle2DBBox(textBlock.getPositions(lineTextRange));
return Math.abs(lineBBox.getMinY() - minY) <= averageLineHeight && Math.abs(maxY - lineBBox.getMaxY()) <= averageLineHeight;
}
private static TextRange filterLineBoundaries(TextBlock textBlock, Predicate<TextRange> textRangePredicate) {
List<TextRange> lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed()
.map(textBlock::getLineTextRange)
.filter(textRangePredicate)
.toList();
if (lineBoundaries.isEmpty()) {
return new TextRange(textBlock.getTextRange().start(), textBlock.getTextRange().start());
}
return TextRange.merge(lineBoundaries);
}
/**
* Finds TextRanges matching a regex pattern within a TextBlock.
*

View File

@ -16,6 +16,9 @@ project.version: 1.0-SNAPSHOT
server:
port: 8080
lifecycle:
base-package: com.iqser.red.service.redaction
spring:
application:
name: redaction-service

View File

@ -14,6 +14,8 @@ dd MMMM yyyy
d MMMM yyyy
MMMM dd, yyyy
MMMM d, yyyy
MMMM, d yyyy
MMMM d,yyyy
dd.MM.yyyy
d.MM.yyyy
yyyy/MM/dd
@ -28,6 +30,7 @@ dd['.'] MMM yyyy
d['.'] MMM yyyy
dd['th']['st']['nd']['rd'] 'of' MMMM, yyyy
d['th']['st']['nd']['rd'] 'of' MMMM, yyyy
d['st']['nd']['rd']['th'] MMMM yyyy
MMMM dd['th']['st']['nd']['rd'], yyyy
MMMM d['th']['st']['nd']['rd'], yyyy
yyyy, MMMM dd
@ -72,3 +75,5 @@ dd.MM.yy
d.MM.yy
dd MMM. yyyy
d MMM. yyyy
d-MMMM-yyyy
dd-MMMM-yyyy

View File

@ -114,13 +114,14 @@ public abstract class AbstractRedactionIntegrationTest {
public static final String PII_TYPE_ID = DICTIONARY_PII + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String TEST_METHOD_TYPE_ID = TEST_METHOD_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String PUBLISHED_INFORMATION_TYPE_ID = PUBLISHED_INFORMATION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String DOSSIER_PUBLISHED_INFORMATION_TYPE_ID = PUBLISHED_INFORMATION_TYPE_ID+ ":" + TEST_DOSSIER_ID;
public static final String DOSSIER_PUBLISHED_INFORMATION_TYPE_ID = PUBLISHED_INFORMATION_TYPE_ID + ":" + TEST_DOSSIER_ID;
public static final String MUST_REDACT_TYPE_ID = MUST_REDACT_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String HINT_ONLY_TYPE_ID = HINT_ONLY_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String REDACTION_TYPE_ID = REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String NO_REDACTION_TYPE_ID = NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String SPONSOR_TYPE_ID = DICTIONARY_SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String AUTHOR_TYPE_ID = DICTIONARY_AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String DOSSIER_AUTHOR_TYPE_ID = AUTHOR_TYPE_ID + ":" + TEST_DOSSIER_ID;
public static final String ADDRESS_TYPE_ID = DICTIONARY_ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String VERTEBRATE_TYPE_ID = VERTEBRATE_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
@ -250,8 +251,10 @@ public abstract class AbstractRedactionIntegrationTest {
true));
when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(IMPORTED_REDACTION_INDICATOR,
true));
when(dictionaryClient.getDictionaryForType(DOSSIER_PUBLISHED_INFORMATION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PUBLISHED_INFORMATION_INDICATOR,
true));
when(dictionaryClient.getDictionaryForType(DOSSIER_PUBLISHED_INFORMATION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
PUBLISHED_INFORMATION_INDICATOR,
true));
when(dictionaryClient.getDictionaryForType(DOSSIER_AUTHOR_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(DICTIONARY_AUTHOR, true));
}
@ -350,6 +353,7 @@ public abstract class AbstractRedactionIntegrationTest {
.collect(Collectors.toSet()));
dossierDictionary.put(IMPORTED_REDACTION_INDICATOR, new ArrayList<>());
dossierDictionary.put(PUBLISHED_INFORMATION_INDICATOR, new ArrayList<>());
dossierDictionary.put(DICTIONARY_AUTHOR, new ArrayList<>());
falsePositive.computeIfAbsent(DICTIONARY_PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt")

View File

@ -9,6 +9,7 @@ import static org.mockito.Mockito.when;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
@ -17,11 +18,13 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.GZIPInputStream;
import org.junit.jupiter.api.BeforeEach;
@ -56,7 +59,6 @@ import com.iqser.red.service.redaction.v1.server.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.service.redaction.v1.server.testcontainers.MongoDBTestContainer;
import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException;
import com.iqser.red.storage.commons.service.StorageService;
import com.knecon.fforesight.mongo.database.commons.liquibase.TenantMongoLiquibaseExecutor;
import com.knecon.fforesight.mongo.database.commons.service.MongoConnectionProvider;
@ -80,7 +82,25 @@ import lombok.extern.slf4j.Slf4j;
* This way you can recreate what is happening on the stack almost exactly.
*/ public class AnalysisEnd2EndTest {
Path dossierTemplateToUse = Path.of("/home/kschuettler/iqser/fforesight/dossier-templates-v2/dev/LayoutParsingDatasetEvaluation"); // Add your dossier-template here
// These files will be uploaded if they are present in the folder
public static final Set<FileType> ENDINGS_TO_UPLOAD = Set.of(FileType.ORIGIN,
FileType.DOCUMENT_PAGES,
FileType.DOCUMENT_POSITION,
FileType.DOCUMENT_STRUCTURE,
FileType.DOCUMENT_TEXT,
FileType.IMAGE_INFO,
FileType.NER_ENTITIES,
FileType.TABLES,
FileType.IMPORTED_REDACTIONS);
// These files must be present in the folder or the test will skip the file
public static final Set<FileType> REQUIRED_FILES = Set.of(FileType.ORIGIN,
FileType.DOCUMENT_PAGES,
FileType.DOCUMENT_POSITION,
FileType.DOCUMENT_STRUCTURE,
FileType.DOCUMENT_TEXT);
Path dossierTemplateToUse = Path.of("/home/kschuettler/Downloads/mainBodyFailed/DOSSIER_TEMPLATE"); // Add your dossier-template here
ObjectMapper mapper = ObjectMapperFactory.create();
final String TENANT_ID = "tenant";
@ -121,7 +141,7 @@ import lombok.extern.slf4j.Slf4j;
@SneakyThrows
public void runAnalysisEnd2End() {
String folder = "/home/kschuettler/Dokumente/analysisend2end/file1"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
String folder = "/home/kschuettler/Downloads/mainBodyFailed/728d0af4-f4c4-4bc9-acf8-7d2632b02962/"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
Path absoluteFolderPath;
if (folder.startsWith("files")) { // if it starts with "files" it is most likely in the resources folder, else it should be an absolute path
@ -133,11 +153,14 @@ import lombok.extern.slf4j.Slf4j;
log.info("Starting end2end analyses for all distinct filenames in folder: {}", folder);
List<AnalyzeRequest> analyzeRequests = prepareStorageForFolder(absoluteFolderPath);
log.info("Found {} distinct fileIds", analyzeRequests.size());
log.info("Found {} distinct fileIds with all required files", analyzeRequests.size());
for (int i = 0; i < analyzeRequests.size(); i++) {
AnalyzeRequest analyzeRequest = analyzeRequests.get(i);
log.info("----------------------------------------------------------------------------------");
log.info("{}/{}: Starting analysis for file {}", i + 1, analyzeRequests.size(), analyzeRequest.getFileId());
analyzeService.analyze(analyzeRequest);
log.info("----------------------------------------------------------------------------------");
log.info("");
}
}
@ -188,22 +211,36 @@ import lombok.extern.slf4j.Slf4j;
@SneakyThrows
private List<AnalyzeRequest> prepareStorageForFolder(Path folder) {
return Files.list(folder)
.map(this::parseFileId)
.distinct()
return findOriginFiles(folder).stream()
.map(fileId -> prepareStorageForFile(fileId, folder))
.filter(Optional::isPresent)
.map(Optional::get)
.toList();
}
private String parseFileId(Path path) {
private Set<String> findOriginFiles(Path folder) throws IOException {
return path.getFileName().toString().split("\\.")[0];
return Files.walk(folder)
.map(this::parseFileName)
.filter(Objects::nonNull)
.collect(Collectors.toSet());
}
private String parseFileName(Path path) {
String suffix = ".ORIGIN.pdf";
if (!path.getFileName().toString().endsWith(suffix)) {
return null;
}
return path.getFileName().toString().replace(suffix, "");
}
@SneakyThrows
private AnalyzeRequest prepareStorageForFile(String fileId, Path folder) {
private Optional<AnalyzeRequest> prepareStorageForFile(String fileName, Path folder) {
AnalyzeRequest request = new AnalyzeRequest();
request.setDossierId(UUID.randomUUID().toString());
@ -211,53 +248,65 @@ import lombok.extern.slf4j.Slf4j;
request.setDossierTemplateId(testDossierTemplate.id);
request.setAnalysisNumber(-1);
Path manualRedactionFile = folder.resolve(fileId + ".MANUAL_REDACTIONS.json");
Path manualRedactionFile = folder.resolve(fileName + ".MANUAL_REDACTIONS.json");
if (Files.exists(manualRedactionFile)) {
request.setManualRedactions(mapper.readValue(manualRedactionFile.toFile(), ManualRedactions.class));
request.setManualRedactions(parseManualRedactions(manualRedactionFile));
} else {
request.setManualRedactions(new ManualRedactions());
}
Set<FileType> endingsToUpload = Set.of("ORIGIN",
"DOCUMENT_PAGES",
"DOCUMENT_POSITION",
"DOCUMENT_STRUCTURE",
"DOCUMENT_TEXT",
"IMAGE_INFO",
"NER_ENTITIES",
"TABLES",
"IMPORTED_REDACTIONS")
.stream()
.map(FileType::valueOf)
.collect(Collectors.toSet());
Set<FileType> uploadedFileTypes = Files.walk(folder)
.filter(path -> path.toFile().isFile())
.filter(path -> parseFileTypeFromPath(path).map(endingsToUpload::contains)
.orElse(false))
.map(filePath -> uploadFile(filePath, request))
.filter(Optional::isPresent)
.map(Optional::get)
Set<FileType> uploadedFileTypes = findFilesToUpload(fileName, folder, ENDINGS_TO_UPLOAD).map(filePath -> uploadFile(filePath, request))
.map(FileToUpload::fileType)
.collect(Collectors.toUnmodifiableSet());
Set<FileType> missingFileTypes = Sets.difference(endingsToUpload, uploadedFileTypes);
Set<FileType> missingFileTypes = Sets.difference(REQUIRED_FILES, uploadedFileTypes);
if (!missingFileTypes.isEmpty()) {
log.error("Folder {} is missing files of type {}",
folder.toFile(),
missingFileTypes.stream()
.map(Enum::toString)
.collect(Collectors.joining(", ")));
throw new NotFoundException("Not all required file types are present.");
return Optional.empty();
}
return request;
return Optional.of(request);
}
private static Optional<FileType> parseFileTypeFromPath(Path path) {
private static Stream<FileToUpload> findFilesToUpload(String fileName, Path folder, Set<FileType> endingsToUpload) throws IOException {
return Files.walk(folder)
.filter(path -> path.toFile().isFile())
.map(path -> parseFileTypeFromPath(path, fileName, endingsToUpload))
.filter(Optional::isPresent)
.map(Optional::get);
}
private ManualRedactions parseManualRedactions(Path manualRedactionFile) {
String fileType = path.getFileName().toString().split("\\.")[1];
try {
return Optional.of(FileType.valueOf(fileType));
return mapper.readValue(manualRedactionFile.toFile(), ManualRedactions.class);
} catch (IOException e) {
log.error("Could not parse manual redactions");
return new ManualRedactions();
}
}
private static Optional<FileToUpload> parseFileTypeFromPath(Path path, String fileName, Set<FileType> endingsToUpload) {
if (!path.getFileName().toString().startsWith(fileName)) {
return Optional.empty();
}
try {
String fileTypeString = path.getFileName().toString().split("\\.")[1];
FileType fileType = FileType.valueOf(fileTypeString);
if (!endingsToUpload.contains(fileType)) {
return Optional.empty();
}
return Optional.of(new FileToUpload(path, fileType));
} catch (IllegalArgumentException e) {
return Optional.empty();
}
@ -265,21 +314,26 @@ import lombok.extern.slf4j.Slf4j;
@SneakyThrows
private Optional<FileType> uploadFile(Path path, AnalyzeRequest request) {
private FileToUpload uploadFile(FileToUpload fileToUpload, AnalyzeRequest request) {
Optional<FileType> fileType = parseFileTypeFromPath(path);
if (fileType.isEmpty()) {
return Optional.empty();
if (fileToUpload.path().getFileName().toString().endsWith(".gz")) {
try (var fis = new FileInputStream(fileToUpload.path().toFile()); var in = new GZIPInputStream(fis);) {
storageService.storeObject(TENANT_ID,
RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), fileToUpload.fileType()),
in);
}
} else {
try (var in = new FileInputStream(fileToUpload.path().toFile())) {
storageService.storeObject(TENANT_ID,
RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), fileToUpload.fileType()),
in);
}
}
try (var fis = new FileInputStream(path.toFile()); var in = new GZIPInputStream(fis);) {
storageService.storeObject(TENANT_ID, RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), fileType.get()), in);
}
return fileType;
return fileToUpload;
}
private class TestDossierTemplate {
public class TestDossierTemplate {
String id;
Dictionary testDictionary;
@ -379,4 +433,8 @@ import lombok.extern.slf4j.Slf4j;
}
private record FileToUpload(Path path, FileType fileType) {
}
}

View File

@ -174,6 +174,16 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
expectedDates.add("03/08/1992");
expectedDates.add("13/08/1992");
expectedDates.add("27/02/1992");
expectedDates.add("27/10/1989");
expectedDates.add("07/10/1989");
expectedDates.add("21/08/1998");
expectedDates.add("02/08/1998");
expectedDates.add("01/05/1988");
expectedDates.add("02/06/2003");
expectedDates.add("03/09/2005");
expectedDates.add("06/09/2005");
expectedDates.add("17/08/2005");
expectedDates.add("22/08/2035");
String dates = experimentalDates.getComponentValues()
.get(0).getValue();

View File

@ -107,7 +107,7 @@ public class MigrationIntegrationTest extends BuildDocumentIntegrationTest {
@SneakyThrows
public void testSave() {
MigratedIds ids = new MigratedIds(new LinkedList<>(), null);
MigratedIds ids = new MigratedIds(new LinkedList<>(), null, null);
ids.addMapping("123", "321");
ids.addMapping("123", "321");
ids.addMapping("123", "321");

View File

@ -10,6 +10,7 @@ import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Stream;
import org.junit.jupiter.api.BeforeEach;
@ -25,24 +26,30 @@ import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.FilterType;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.iqser.red.commons.jackson.ObjectMapperFactory;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService;
@ -50,6 +57,8 @@ import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsi
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration;
import com.knecon.fforesight.tenantcommons.TenantContext;
import lombok.SneakyThrows;
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class)
@ -103,6 +112,17 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build(),
Type.builder()
.id(DOSSIER_AUTHOR_TYPE_ID)
.type(DICTIONARY_AUTHOR)
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.dossierId(TEST_DOSSIER_ID)
.hexColor("#ffe184")
.isHint(hintTypeMap.get(DICTIONARY_AUTHOR))
.isCaseInsensitive(caseInSensitiveMap.get(DICTIONARY_AUTHOR))
.isRecommendation(recommendationTypeMap.get(DICTIONARY_AUTHOR))
.rank(rankTypeMap.get(DICTIONARY_AUTHOR))
.build(),
Type.builder()
.id(DOSSIER_PUBLISHED_INFORMATION_TYPE_ID)
.type(PUBLISHED_INFORMATION_INDICATOR)
@ -158,10 +178,10 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst()
.orElseThrow();
assertThat(publishedInformationEntry1.getSection().startsWith("Paragraph:"));
assertThat(publishedInformationEntry1.getSection()).startsWith("Paragraph:");
var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst()
.orElseThrow();
assertThat(asyaLyon1.getSection().startsWith("Paragraph:"));
assertThat(asyaLyon1.getSection()).startsWith("Paragraph:");
assertEquals(EntryState.SKIPPED, asyaLyon1.getState());
var idRemoval = buildIdRemoval(publishedInformationEntry1.getId());
@ -212,10 +232,10 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst()
.orElseThrow();
assertThat(publishedInformationEntry1.getSection().startsWith("Paragraph:"));
assertThat(publishedInformationEntry1.getSection()).startsWith("Paragraph:");
var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst()
.orElseThrow();
assertThat(publishedInformationEntry1.getSection().startsWith("Paragraph:"));
assertThat(publishedInformationEntry1.getSection()).startsWith("Paragraph:");
assertEquals(EntryState.SKIPPED, asyaLyon1.getState());
var idRemoval = buildIdRemoval(publishedInformationEntry1.getId());
@ -244,6 +264,7 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
assertThat(dictionary.get(PUBLISHED_INFORMATION_INDICATOR).contains("Press")).isFalse();
}
@Test
public void testPublishedInformationRemovalAtDossierLevel() throws IOException {
@ -348,7 +369,7 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var desireeEtAl = findEntityByTypeAndValue(entityLog, "CBI_author", "Desiree").filter(e -> e.getEntryType().equals(EntryType.ENTITY))
.filter(e -> e.getMatchedRule().startsWith("CBI.16"))
.filter(e -> e.getMatchedRule().startsWith("CBI.7"))
.findAny()
.orElseThrow();
IdRemoval removal = buildIdRemoval(desireeEtAl.getId());
@ -365,6 +386,75 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
}
@Test
@SneakyThrows
void testNerEntitiesAfterReanalysis() {
String EFSA_SANITISATION_RULES = loadFromClassPath("drools/efsa_sanitisation.drl");
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(EFSA_SANITISATION_RULES));
ClassPathResource responseJson = new ClassPathResource("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1 3.NER_ENTITIES.json");
storageService.storeObject(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES),
responseJson.getInputStream());
String pdfFile = "files/new/SYNGENTA_EFSA_sanitisation_GFL_v1 3.pdf";
AnalyzeRequest request = uploadFileToStorage(pdfFile);
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
request.setAnalysisNumber(1);
dossierDictionary.put(DICTIONARY_AUTHOR, new ArrayList<>());
mockDictionaryCalls(0L);
analyzeService.analyze(request);
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
String nerValue = "Osip S.";
var nerEntity = findEntityByTypeAndValue(entityLog, DICTIONARY_AUTHOR, nerValue).findFirst()
.orElseThrow();
assertThat(nerEntity.getEngines()).contains(Engine.NER);
String dictionaryAddValue = "cooperation";
ManualRedactionEntry manualRedactionEntry = ManualRedactionEntry.builder()
.value(dictionaryAddValue)
.type(DICTIONARY_AUTHOR)
.user("user")
.addToDossierDictionary(true)
.positions(List.of(Rectangle.builder().topLeftX(180.748f).topLeftY(546.564f).width(56.592f).height(15.408f).page(1).build()))
.type("dossier_redaction")
.fileId(TEST_FILE_ID)
.requestDate(OffsetDateTime.now())
.annotationId(UUID.randomUUID().toString())
.build();
request.setManualRedactions(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry)).build());
request.setAnalysisNumber(2);
dossierDictionary.get(DICTIONARY_AUTHOR).add(dictionaryAddValue);
reanlysisVersions.put(dictionaryAddValue, 2L);
when(dictionaryClient.getVersionForDossier(TEST_DOSSIER_ID)).thenReturn(2L);
mockDictionaryCalls(1L);
AnalyzeResult reanalyzeResult = analyzeService.reanalyze(request);
entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
EntityLogEntry entityLogEntryAdded = entityLog.getEntityLogEntry()
.stream()
.filter(entityLogEntry -> entityLogEntry.getValue().equals(dictionaryAddValue))
.findFirst()
.get();
assertEquals(EntryState.APPLIED, entityLogEntryAdded.getState());
nerEntity = findEntityByTypeAndValue(entityLog, DICTIONARY_AUTHOR, nerValue).findFirst()
.orElseThrow();
assertThat(nerEntity.getEngines()).contains(Engine.NER);
dossierDictionary.get(DICTIONARY_AUTHOR).remove(dictionaryAddValue);
}
private static IdRemoval buildIdRemoval(String id) {
return IdRemoval.builder().annotationId(id).user("user").requestDate(OffsetDateTime.now()).fileId(TEST_FILE_ID).build();

View File

@ -43,7 +43,17 @@ public class DateConverterTest {
"28 March 2018 (animal 1 - 5000 mg/kg bw)",
"28 March 2018 (animal1 - 5000 mg/kg bw)",
"28 August 2018 (animal 1)",
"31 August 2018 (animal 1)");
"31 August 2018 (animal 1)",
"October, 27 1989",
"October, 7 1989",
"August 21,1998",
"August 2,1998",
"1st May 1988",
"2nd June 2003",
"3rd September 2005",
"6th September 2005",
"17th August 2005",
"22nd August 2035");
for (String dateStr : goldenStandardDates) {
Optional<Date> parsedDate = DateConverter.parseDate(dateStr);

View File

@ -361,7 +361,6 @@ class DroolsValidationServiceTest {
}
@Test
@SneakyThrows
void testRulesWithBlacklistedKeyword() {
@ -379,16 +378,16 @@ class DroolsValidationServiceTest {
String rulesString1 = rulesString.substring(0, indexGlobalStart) + importTenantContext + rulesString.substring(indexGlobalStart);
String evilRulePart1 = """
//------------------------------------ All the evil rules ------------------------------------
// Rule unit: EV.1
rule "EV.1.0: Remove duplicate FileAttributes but also do very evil things"
salience 999
when
$fileAttribute: FileAttribute($label: label, $value: value)
$duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value)
""";
""";
String evilRulePart2 = """
then
retract($duplicate);
@ -408,11 +407,13 @@ class DroolsValidationServiceTest {
.forEach(System.out::println);
assertFalse(droolsValidation.isCompiled());
assertEquals(2, droolsValidation.getBlacklistErrorMessages().size());
assertEquals(1, droolsValidation.getBlacklistErrorMessages()
assertEquals(1,
droolsValidation.getBlacklistErrorMessages()
.get(0).getBlacklistedKeywords().size());
assertTrue(droolsValidation.getBlacklistErrorMessages()
.get(0).getBlacklistedKeywords().contains("TenantContext"));
assertEquals(2, droolsValidation.getBlacklistErrorMessages()
assertEquals(2,
droolsValidation.getBlacklistErrorMessages()
.get(1).getBlacklistedKeywords().size());
assertTrue(droolsValidation.getBlacklistErrorMessages()
.get(1).getBlacklistedKeywords().contains("TenantContext"));
@ -429,14 +430,39 @@ class DroolsValidationServiceTest {
.forEach(System.out::println);
assertFalse(droolsValidation2.isCompiled());
assertEquals(2, droolsValidation2.getBlacklistErrorMessages().size());
assertEquals(1, droolsValidation2.getBlacklistErrorMessages()
assertEquals(1,
droolsValidation2.getBlacklistErrorMessages()
.get(0).getBlacklistedKeywords().size());
assertTrue(droolsValidation2.getBlacklistErrorMessages()
.get(0).getBlacklistedKeywords().contains("TenantContext"));
assertEquals(1, droolsValidation2.getBlacklistErrorMessages()
assertEquals(1,
droolsValidation2.getBlacklistErrorMessages()
.get(1).getBlacklistedKeywords().size());
assertTrue(droolsValidation2.getBlacklistErrorMessages()
.get(1).getBlacklistedKeywords().contains("System."));
}
@Test
void assertRuleIdentifierDoesNotMatch() {
String ruleString = RuleManagementResources.getBaseRuleFileString() + """
rule "CBI.0.0: Redact CBI Authors (non vertebrate Study)"
when
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$entity: TextEntity(type() == "CBI_author", dictionaryEntry)
then
$entity.redact(
"CBI.1.0",
"Author found",
"Article 39(e)(3) of Regulation (EC) No 178/2002"
);
end
""";
RuleFileBluePrint ruleFileBluePrint = RuleFileParser.buildBluePrintFromRulesString(ruleString);
assertFalse(ruleFileBluePrint.getDroolsValidation().isCompiled());
}
}

View File

@ -129,56 +129,54 @@ rule "CBI.2.0: Do not redact genitive CBI Author"
// Rule unit: CBI.7
rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables"
rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
$section: Section(containsString("et al."))
then
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.7.0",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row"
rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
then
$authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
end
rule "CBI.7.2: Do not redact PII if published information found in Section without tables"
when
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
hasEntitiesOfType("PII"))
then
$section.getEntitiesOfType("PII")
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.7.2",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.7.3: Do not redact PII if published information found in same table row"
rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
then
$pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
dictionary.recommendEverywhere(entity);
});
end
@ -289,54 +287,56 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study
// Rule unit: CBI.16
rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables"
when
$section: Section(containsString("et al."))
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
dictionary.recommendEverywhere(entity);
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.16.0",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
});
end
rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row"
when
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
$authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
end
rule "CBI.16.2: Do not redact PII if published information found in Section without tables"
when
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
hasEntitiesOfType("PII"))
then
$section.getEntitiesOfType("PII")
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.16.2",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
});
end
rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
rule "CBI.16.3: Do not redact PII if published information found in same table row"
when
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
dictionary.recommendEverywhere(entity);
});
$pii.skipWithReferences("CBI.16.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
end

View File

@ -312,56 +312,54 @@ rule "CBI.6.1: Do not redact Names and Addresses if vertebrate but also publishe
// Rule unit: CBI.7
rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables"
rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
$section: Section(containsString("et al."))
then
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.7.0",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row"
rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
then
$authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
end
rule "CBI.7.2: Do not redact PII if published information found in Section without tables"
when
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
hasEntitiesOfType("PII"))
then
$section.getEntitiesOfType("PII")
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.7.2",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.7.3: Do not redact PII if published information found in same table row"
rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
then
$pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
dictionary.recommendEverywhere(entity);
});
end
@ -654,54 +652,56 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio
// Rule unit: CBI.16
rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables"
when
$section: Section(containsString("et al."))
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
dictionary.recommendEverywhere(entity);
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.16.0",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
});
end
rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row"
when
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
$authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
end
rule "CBI.16.2: Do not redact PII if published information found in Section without tables"
when
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
hasEntitiesOfType("PII"))
then
$section.getEntitiesOfType("PII")
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.16.2",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
});
end
rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
rule "CBI.16.3: Do not redact PII if published information found in same table row"
when
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
dictionary.recommendEverywhere(entity);
});
$pii.skipWithReferences("CBI.16.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
end

View File

@ -129,30 +129,30 @@ rule "CBI.2.0: Do not redact genitive CBI Author"
// Rule unit: CBI.7
rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables"
rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
then
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.7.0",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row"
rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
then
$authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
@ -224,30 +224,30 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study
// Rule unit: CBI.16
rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables"
when
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.16.0",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
});
end
rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row"
when
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
$authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
end

View File

@ -214,6 +214,58 @@ rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also red
end
// Rule unit: CBI.7
rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
dictionary.recommendEverywhere(entity);
});
end
// Rule unit: CBI.8
rule "CBI.8.0: Redacted because Section contains must_redact entity"
when
@ -424,58 +476,6 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio
end
// Rule unit: CBI.16
rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
dictionary.recommendEverywhere(entity);
});
end
// Rule unit: CBI.17
rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon"
when

View File

@ -4,11 +4,13 @@ import java.io.File;
import java.io.FileOutputStream;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Map;
import com.knecon.fforesight.utility.rules.management.factory.RuleFileFactory;
import com.knecon.fforesight.utility.rules.management.factory.RuleFileParser;
import com.knecon.fforesight.utility.rules.management.models.BasicRule;
import com.knecon.fforesight.utility.rules.management.models.RuleFileBluePrint;
import com.knecon.fforesight.utility.rules.management.models.RuleIdentifier;
import com.knecon.fforesight.utility.rules.management.utils.RuleFileIO;
import lombok.SneakyThrows;
@ -21,17 +23,15 @@ import lombok.experimental.UtilityClass;
@UtilityClass
public class RuleFileMigrator {
@SneakyThrows
public void migrateFile(File ruleFile) {
RuleFileBluePrint ruleFileBluePrint = RuleFileParser.buildBluePrintFromRulesString(RuleFileIO.getRulesString(ruleFile.getAbsolutePath()));
RuleFileBluePrint combinedBluePrint = RuleFileParser.buildBluePrintFromAllRuleFiles();
for (BasicRule ruleToReplace : ruleFileBluePrint.getAllRules()) {
List<BasicRule> rulesToAdd = combinedBluePrint.findRuleByIdentifier(ruleToReplace.identifier());
ruleFileBluePrint.removeRule(ruleToReplace.identifier());
rulesToAdd.forEach(ruleFileBluePrint::addRule);
}
//replaceRules(ruleFileBluePrint, combinedBluePrint);
replaceRuleIdentifiers(combinedBluePrint, ruleFileBluePrint);
String migratedRulesString = RuleFileFactory.buildRuleString(ruleFileBluePrint);
String migratedFilePath = ruleFile.getAbsolutePath();
@ -40,4 +40,35 @@ public class RuleFileMigrator {
}
}
private static void replaceRules(RuleFileBluePrint ruleFileBluePrint, RuleFileBluePrint combinedBluePrint) {
for (BasicRule ruleToReplace : ruleFileBluePrint.getAllRules()) {
List<BasicRule> rulesToAdd = combinedBluePrint.findRuleByIdentifier(ruleToReplace.identifier());
ruleFileBluePrint.removeRule(ruleToReplace.identifier());
rulesToAdd.forEach(ruleFileBluePrint::addRule);
}
}
private static void replaceRuleIdentifiers(RuleFileBluePrint combinedBluePrint, RuleFileBluePrint ruleFileBluePrint) {
Map<String, String> identifierReplaceMap = Map.of("CBI.7.0", "CBI.16.0", "CBI.7.1", "CBI.16.1", "CBI.7.2", "CBI.16.2", "CBI.7.3", "CBI.16.3");
for (String identifier : identifierReplaceMap.keySet()) {
RuleIdentifier ruleId = RuleIdentifier.fromString(identifier);
RuleIdentifier otherRuleId = RuleIdentifier.fromString(identifierReplaceMap.get(identifier));
List<BasicRule> rulesToAdd = combinedBluePrint.findRuleByIdentifier(otherRuleId);
List<BasicRule> otherRulesToAdd = combinedBluePrint.findRuleByIdentifier(ruleId);
boolean removeRules = ruleFileBluePrint.removeRule(ruleId);
boolean removeOtherRules = ruleFileBluePrint.removeRule(otherRuleId);
if (removeRules) {
rulesToAdd.forEach(ruleFileBluePrint::addRule);
}
if (removeOtherRules) {
otherRulesToAdd.forEach(ruleFileBluePrint::addRule);
}
}
}
}

View File

@ -7,15 +7,19 @@ import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;
public record RuleFileBluePrint(String imports, String globals, String queries, List<RuleClass> ruleClasses) {
public void removeRule(RuleIdentifier ruleIdentifier) {
public boolean removeRule(RuleIdentifier ruleIdentifier) {
AtomicBoolean wasRemoved = new AtomicBoolean(false);
findRuleClassByType(ruleIdentifier.type()).ifPresent(ruleClass -> ruleClass.findRuleUnitByInteger(ruleIdentifier.unit())
.ifPresent(ruleUnit -> {
ruleUnit.rules().removeIf(rule -> rule.identifier().matches(ruleIdentifier));
boolean removed = ruleUnit.rules().removeIf(rule -> rule.identifier().matches(ruleIdentifier));
wasRemoved.set(removed);
if (ruleUnit.rules().isEmpty()) {
ruleClass.ruleUnits().remove(ruleUnit);
}
@ -23,7 +27,7 @@ public record RuleFileBluePrint(String imports, String globals, String queries,
ruleClasses().remove(ruleClass);
}
}));
return wasRemoved.get();
}

View File

@ -312,58 +312,55 @@ rule "CBI.6.1: Do not redact Names and Addresses if vertebrate but also publishe
// Rule unit: CBI.7
rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables"
rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
$section: Section(containsString("et al."))
then
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.7.0",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row"
rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
then
$authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
end
rule "CBI.7.2: Do not redact PII if published information found in Section without tables"
when
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
hasEntitiesOfType("PII"))
then
$section.getEntitiesOfType("PII")
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.7.2",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.7.3: Do not redact PII if published information found in same table row"
rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
then
$pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
dictionary.recommendEverywhere(entity);
});
end
// Rule unit: CBI.8
rule "CBI.8.0: Redacted because Section contains must_redact entity"
@ -653,54 +650,56 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio
// Rule unit: CBI.16
rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables"
when
$section: Section(containsString("et al."))
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
dictionary.recommendEverywhere(entity);
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.16.0",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
});
end
rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row"
when
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
$authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
end
rule "CBI.16.2: Do not redact PII if published information found in Section without tables"
when
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
hasEntitiesOfType("PII"))
then
$section.getEntitiesOfType("PII")
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.16.2",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
});
end
rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
rule "CBI.16.3: Do not redact PII if published information found in same table row"
when
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
dictionary.recommendEverywhere(entity);
});
$pii.skipWithReferences("CBI.16.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
end

View File

@ -25,10 +25,12 @@ public class RuleFileMigrationTest {
// Put your redaction service drools paths and dossier-templates paths both RM and DM here
static final List<String> ruleFileDirs = List.of(
"/home/kschuettler/iqser/redaction/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools",
"/home/kschuettler/iqser/redaction/dossier-templates-v2",
"/home/kschuettler/iqser/fforesight/dossier-templates-v2",
"/home/kschuettler/iqser/business-logic");
//"/Users/maverickstuder/Documents/RedactManager/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools",
// "/Users/maverickstuder/Documents/RedactManager/dossier-templates-v2"
"/Users/maverickstuder/Documents/PM"
);
@Test