diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/MigrationRequest.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/MigrationRequest.java index b895661f..316fa2b1 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/MigrationRequest.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/MigrationRequest.java @@ -1,7 +1,6 @@ package com.iqser.red.service.redaction.v1.model; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts index a45fbb68..08413ee2 100644 --- a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts +++ b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts @@ -12,11 +12,11 @@ plugins { description = "redaction-service-server-v1" -val layoutParserVersion = "0.86.0" +val layoutParserVersion = "0.89.3" val jacksonVersion = "2.15.2" val droolsVersion = "9.44.0.Final" val pdfBoxVersion = "3.0.0" -val persistenceServiceVersion = "2.338.0" +val persistenceServiceVersion = "2.349.7" val springBootStarterVersion = "3.1.5" configurations { @@ -65,6 +65,7 @@ dependencies { testImplementation("org.apache.pdfbox:pdfbox-tools:${pdfBoxVersion}") testImplementation("org.springframework.boot:spring-boot-starter-test:${springBootStarterVersion}") + testImplementation("com.knecon.fforesight:viewer-doc-processor:${layoutParserVersion}") testImplementation("com.knecon.fforesight:layoutparser-service-processor:${layoutParserVersion}") { exclude( group = "com.iqser.red.service", diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/LegacyRedactionLogMergeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/LegacyRedactionLogMergeService.java index aaa8a1be..0d141c87 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/LegacyRedactionLogMergeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/LegacyRedactionLogMergeService.java @@ -45,7 +45,7 @@ public class LegacyRedactionLogMergeService { public RedactionLog addManualAddEntriesAndRemoveSkippedImported(RedactionLog redactionLog, ManualRedactions manualRedactions, String dossierTemplateId) { Set skippedImportedRedactions = new HashSet<>(); - log.info("Merging Redaction log with manual redactions"); + log.info("Adding manual add Entries and removing skipped or imported entries"); if (manualRedactions != null) { var manualRedactionLogEntries = addManualAddEntries(manualRedactions.getEntriesToAdd(), redactionLog.getAnalysisNumber()); @@ -92,6 +92,10 @@ public class LegacyRedactionLogMergeService { return redactionLog; } + public long getNumberOfAffectedAnnotations(ManualRedactions manualRedactions) { + + return createManualRedactionWrappers(manualRedactions).stream().map(ManualRedactionWrapper::getId).distinct().count(); + } private List createManualRedactionWrappers(ManualRedactions manualRedactions) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMapper.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMapper.java new file mode 100644 index 00000000..c9909086 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMapper.java @@ -0,0 +1,93 @@ +package com.iqser.red.service.redaction.v1.server.migration; + +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ChangeType; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Change; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualRedactionType; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; + +public class MigrationMapper { + + public static com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Change toEntityLogChanges(Change change) { + + return new com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Change(change.getAnalysisNumber(), + toEntityLogType(change.getType()), + change.getDateTime()); + } + + + public static com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange toEntityLogManualChanges(com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualChange manualChange) { + + return new com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange(toManualRedactionType(manualChange.getManualRedactionType()), + manualChange.getProcessedDate(), + manualChange.getRequestedDate(), + manualChange.getUserId(), + manualChange.getPropertyChanges()); + } + + + public static ChangeType toEntityLogType(com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ChangeType type) { + + return switch (type) { + case ADDED -> ChangeType.ADDED; + case REMOVED -> ChangeType.REMOVED; + case CHANGED -> ChangeType.CHANGED; + }; + } + + + public static com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType toManualRedactionType(ManualRedactionType manualRedactionType) { + + return switch (manualRedactionType) { + case ADD_LOCALLY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.ADD; + case ADD_TO_DICTIONARY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.ADD_TO_DICTIONARY; + case REMOVE_LOCALLY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.REMOVE; + case REMOVE_FROM_DICTIONARY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.REMOVE_FROM_DICTIONARY; + case FORCE_REDACT -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.FORCE; + case FORCE_HINT -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.FORCE; + case RECATEGORIZE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.RECATEGORIZE; + case LEGAL_BASIS_CHANGE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.LEGAL_BASIS_CHANGE; + case RESIZE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.RESIZE; + }; + } + + + public static com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine toEntityLogEngine(Engine engine) { + + return switch (engine) { + case DICTIONARY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine.DICTIONARY; + case NER -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine.NER; + case RULE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine.RULE; + }; + } + + + public static Set getMigratedEngines(RedactionLogEntry entry) { + + if (entry.getEngines() == null) { + return Collections.emptySet(); + } + return entry.getEngines() + .stream() + .map(MigrationMapper::toEntityLogEngine) + .collect(Collectors.toSet()); + } + + + public List migrateManualChanges(List manualChanges) { + + if (manualChanges == null) { + return Collections.emptyList(); + } + return manualChanges.stream() + .map(MigrationMapper::toEntityLogManualChanges) + .toList(); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMessageReceiver.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMessageReceiver.java index 66e8db83..740d4116 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMessageReceiver.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMessageReceiver.java @@ -58,19 +58,28 @@ public class MigrationMessageReceiver { if (redactionLog.getAnalysisVersion() == 0) { redactionLog = legacyVersion0MigrationService.mergeDuplicateAnnotationIds(redactionLog); } else if (migrationRequest.getManualRedactions() != null) { - redactionLog = legacyRedactionLogMergeService.addManualAddEntriesAndRemoveSkippedImported(redactionLog, migrationRequest.getManualRedactions(), migrationRequest.getDossierTemplateId()); + redactionLog = legacyRedactionLogMergeService.addManualAddEntriesAndRemoveSkippedImported(redactionLog, + migrationRequest.getManualRedactions(), + migrationRequest.getDossierTemplateId()); } - MigratedEntityLog migratedEntityLog = redactionLogToEntityLogMigrationService.migrate(redactionLog, document, migrationRequest.getDossierTemplateId(), migrationRequest.getManualRedactions()); + MigratedEntityLog migratedEntityLog = redactionLogToEntityLogMigrationService.migrate(redactionLog, + document, + migrationRequest.getDossierTemplateId(), + migrationRequest.getManualRedactions(), + migrationRequest.getFileId()); + log.info("Storing migrated entityLog and ids to migrate in DB for file {}", migrationRequest.getFileId()); redactionStorageService.storeObject(migrationRequest.getDossierId(), migrationRequest.getFileId(), FileType.ENTITY_LOG, migratedEntityLog.getEntityLog()); redactionStorageService.storeObject(migrationRequest.getDossierId(), migrationRequest.getFileId(), FileType.MIGRATED_IDS, migratedEntityLog.getMigratedIds()); sendFinished(MigrationResponse.builder().dossierId(migrationRequest.getDossierId()).fileId(migrationRequest.getFileId()).build()); - log.info("Migrated {} redactionLog entries for dossierId {} and fileId {}", - migratedEntityLog.getEntityLog().getEntityLogEntry().size(), - migrationRequest.getDossierId(), - migrationRequest.getFileId()); + log.info("Migrated {} redactionLog entries, found {} annotation ids for migration in the db, {} new manual entries, for dossierId {} and fileId {}", + migratedEntityLog.getEntityLog().getEntityLogEntry().size(), + migratedEntityLog.getMigratedIds().getMappings().size(), + migratedEntityLog.getMigratedIds().getManualRedactionEntriesToAdd().size(), + migrationRequest.getDossierId(), + migrationRequest.getFileId()); log.info(""); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/RedactionLogToEntityLogMigrationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/RedactionLogToEntityLogMigrationService.java index 938109d4..681b1159 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/RedactionLogToEntityLogMigrationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/RedactionLogToEntityLogMigrationService.java @@ -19,29 +19,23 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.migration.MigratedIds; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualRedactionType; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogLegalBasis; -import com.iqser.red.service.redaction.v1.model.MigrationRequest; -import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity; import com.iqser.red.service.redaction.v1.server.model.MigratedEntityLog; import com.iqser.red.service.redaction.v1.server.model.MigrationEntity; +import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity; import com.iqser.red.service.redaction.v1.server.model.RectangleWithPage; -import com.iqser.red.service.redaction.v1.server.model.document.TextRange; -import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.service.DictionaryService; import com.iqser.red.service.redaction.v1.server.service.ManualChangesApplicationService; -import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService; -import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichmentService; import com.iqser.red.service.redaction.v1.server.service.document.EntityFindingUtility; +import com.iqser.red.service.redaction.v1.server.service.document.EntityFromPrecursorCreationService; import com.iqser.red.service.redaction.v1.server.utils.IdBuilder; import com.iqser.red.service.redaction.v1.server.utils.MigratedIdsCollector; @@ -59,17 +53,19 @@ public class RedactionLogToEntityLogMigrationService { private static final double MATCH_THRESHOLD = 10; EntityFindingUtility entityFindingUtility; - EntityEnrichmentService entityEnrichmentService; DictionaryService dictionaryService; ManualChangesApplicationService manualChangesApplicationService; - public MigratedEntityLog migrate(RedactionLog redactionLog, Document document, String dossierTemplateId, ManualRedactions manualRedactions) { + public MigratedEntityLog migrate(RedactionLog redactionLog, Document document, String dossierTemplateId, ManualRedactions manualRedactions, String fileId) { + + log.info("Migrating entities for file {}", fileId); + List entitiesToMigrate = calculateMigrationEntitiesFromRedactionLog(redactionLog, document, dossierTemplateId, fileId); - List entitiesToMigrate = calculateMigrationEntitiesFromRedactionLog(redactionLog, document, dossierTemplateId); MigratedIds migratedIds = entitiesToMigrate.stream() .collect(new MigratedIdsCollector()); + log.info("applying manual changes to migrated entities for file {}", fileId); applyManualChanges(entitiesToMigrate, manualRedactions); EntityLog entityLog = new EntityLog(); @@ -85,13 +81,16 @@ public class RedactionLogToEntityLogMigrationService { .toList()); Map oldToNewIDMapping = migratedIds.buildOldToNewMapping(); + + log.info("Writing migrated entities to entityLog for file {}", fileId); entityLog.setEntityLogEntry(entitiesToMigrate.stream() .map(migrationEntity -> migrationEntity.toEntityLogEntry(oldToNewIDMapping)) .toList()); - if (getNumberOfApprovedEntries(redactionLog) != entityLog.getEntityLogEntry().size()) { + if (getNumberOfApprovedEntries(redactionLog, document.getNumberOfPages()) != entityLog.getEntityLogEntry().size()) { String message = String.format("Not all entities have been found during the migration redactionLog has %d entries and new entityLog %d", - redactionLog.getRedactionLogEntry().size(), + redactionLog.getRedactionLogEntry() + .size(), entityLog.getEntityLogEntry().size()); log.error(message); throw new AssertionError(message); @@ -102,6 +101,13 @@ public class RedactionLogToEntityLogMigrationService { .filter(m -> !m.getOldId().equals(m.getNewId())) .collect(new MigratedIdsCollector()); + List manualRedactionEntriesToAdd = entitiesToMigrate.stream() + .filter(MigrationEntity::needsManualEntry) + .map(MigrationEntity::buildManualRedactionEntry) + .toList(); + + idsToMigrateInDb.setManualRedactionEntriesToAdd(manualRedactionEntriesToAdd); + return new MigratedEntityLog(idsToMigrateInDb, entityLog); } @@ -117,40 +123,27 @@ public class RedactionLogToEntityLogMigrationService { manualRedactions.getForceRedactions(), manualRedactions.getResizeRedactions(), manualRedactions.getLegalBasisChanges(), - manualRedactions.getRecategorizations(), - manualRedactions.getLegalBasisChanges()) + manualRedactions.getRecategorizations()) .flatMap(Collection::stream) .collect(Collectors.groupingBy(BaseAnnotation::getAnnotationId)); - entitiesToMigrate.forEach(migrationEntity -> manualChangesPerAnnotationId.getOrDefault(migrationEntity.getOldId(), Collections.emptyList()) - .forEach(manualChange -> { - if (manualChange instanceof ManualResizeRedaction manualResizeRedaction && migrationEntity.getMigratedEntity() instanceof TextEntity textEntity) { - ManualResizeRedaction migratedManualResizeRedaction = ManualResizeRedaction.builder() - .positions(manualResizeRedaction.getPositions()) - .annotationId(migrationEntity.getNewId()) - .updateDictionary(manualResizeRedaction.getUpdateDictionary()) - .addToAllDossiers(manualResizeRedaction.isAddToAllDossiers()) - .textAfter(manualResizeRedaction.getTextAfter()) - .textBefore(manualResizeRedaction.getTextBefore()) - .build(); - manualChangesApplicationService.resize(textEntity, migratedManualResizeRedaction); - } else { - migrationEntity.getMigratedEntity().getManualOverwrite().addChange(manualChange); - } - })); + entitiesToMigrate.forEach(migrationEntity -> migrationEntity.applyManualChanges(manualChangesPerAnnotationId.getOrDefault(migrationEntity.getOldId(), + Collections.emptyList()), + manualChangesApplicationService)); + } - private static long getNumberOfApprovedEntries(RedactionLog redactionLog) { + private long getNumberOfApprovedEntries(RedactionLog redactionLog, int numberOfPages) { - return redactionLog.getRedactionLogEntry().size(); + return redactionLog.getRedactionLogEntry().stream().filter(redactionLogEntry -> isOnExistingPage(redactionLogEntry, numberOfPages)).collect(Collectors.toList()).size(); } - private List calculateMigrationEntitiesFromRedactionLog(RedactionLog redactionLog, Document document, String dossierTemplateId) { + private List calculateMigrationEntitiesFromRedactionLog(RedactionLog redactionLog, Document document, String dossierTemplateId, String fileId) { - List images = getImageBasedMigrationEntities(redactionLog, document, dossierTemplateId); - List textMigrationEntities = getTextBasedMigrationEntities(redactionLog, document, dossierTemplateId); + List images = getImageBasedMigrationEntities(redactionLog, document, fileId); + List textMigrationEntities = getTextBasedMigrationEntities(redactionLog, document, dossierTemplateId, fileId); return Stream.of(textMigrationEntities.stream(), images.stream()) .flatMap(Function.identity()) .toList(); @@ -163,7 +156,7 @@ public class RedactionLogToEntityLogMigrationService { } - private List getImageBasedMigrationEntities(RedactionLog redactionLog, Document document, String dossierTemplateId) { + private List getImageBasedMigrationEntities(RedactionLog redactionLog, Document document, String fileId) { List images = document.streamAllImages() .collect(Collectors.toList()); @@ -195,7 +188,8 @@ public class RedactionLogToEntityLogMigrationService { } String ruleIdentifier; - String reason = Optional.ofNullable(redactionLogImage.getReason()).orElse(""); + String reason = Optional.ofNullable(redactionLogImage.getReason()) + .orElse(""); if (redactionLogImage.getMatchedRule().isBlank() || redactionLogImage.getMatchedRule() == null) { ruleIdentifier = "OLDIMG.0.0"; } else { @@ -209,7 +203,7 @@ public class RedactionLogToEntityLogMigrationService { } else { closestImage.skip(ruleIdentifier, reason); } - migrationEntities.add(new MigrationEntity(null, redactionLogImage, closestImage, redactionLogImage.getId(), closestImage.getId())); + migrationEntities.add(MigrationEntity.fromRedactionLogImage(redactionLogImage, closestImage, fileId)); } return migrationEntities; } @@ -250,40 +244,21 @@ public class RedactionLogToEntityLogMigrationService { } - private List getTextBasedMigrationEntities(RedactionLog redactionLog, Document document, String dossierTemplateId) { + private List getTextBasedMigrationEntities(RedactionLog redactionLog, Document document, String dossierTemplateId, String fileId) { List entitiesToMigrate = redactionLog.getRedactionLogEntry() .stream() .filter(redactionLogEntry -> !redactionLogEntry.isImage()) - .map(entry -> MigrationEntity.fromRedactionLogEntry(entry, dictionaryService.isHint(entry.getType(), dossierTemplateId))) - .peek(migrationEntity -> { - if (migrationEntity.getPrecursorEntity().getEntityType().equals(EntityType.HINT) &&// - !migrationEntity.getRedactionLogEntry().isHint() &&// - !migrationEntity.getRedactionLogEntry().isRedacted()) { - migrationEntity.getPrecursorEntity().ignore(migrationEntity.getPrecursorEntity().getRuleIdentifier(), migrationEntity.getPrecursorEntity().getReason()); - } else if (migrationEntity.getRedactionLogEntry().lastChangeIsRemoved()) { - migrationEntity.getPrecursorEntity().remove(migrationEntity.getPrecursorEntity().getRuleIdentifier(), migrationEntity.getPrecursorEntity().getReason()); - } else if (lastManualChangeIsRemove(migrationEntity)) { - migrationEntity.getPrecursorEntity().ignore(migrationEntity.getPrecursorEntity().getRuleIdentifier(), migrationEntity.getPrecursorEntity().getReason()); - } else if (migrationEntity.getPrecursorEntity().isApplied() && migrationEntity.getRedactionLogEntry().isRecommendation()) { - migrationEntity.getPrecursorEntity() - .skip(migrationEntity.getPrecursorEntity().getRuleIdentifier(), migrationEntity.getPrecursorEntity().getReason()); - } else if (migrationEntity.getPrecursorEntity().isApplied()) { - migrationEntity.getPrecursorEntity() - .apply(migrationEntity.getPrecursorEntity().getRuleIdentifier(), - migrationEntity.getPrecursorEntity().getReason(), - migrationEntity.getPrecursorEntity().getLegalBasis()); - } else { - migrationEntity.getPrecursorEntity() - .skip(migrationEntity.getPrecursorEntity().getRuleIdentifier(), migrationEntity.getPrecursorEntity().getReason()); - } - }) + .filter(redactionLogEntry -> isOnExistingPage(redactionLogEntry, document.getNumberOfPages())) + .map(entry -> MigrationEntity.fromRedactionLogEntry(entry, dictionaryService.isHint(entry.getType(), dossierTemplateId), fileId)) .toList(); - Map> tempEntitiesByValue = entityFindingUtility.findAllPossibleEntitiesAndGroupByValue(document, - entitiesToMigrate.stream() - .map(MigrationEntity::getPrecursorEntity) - .toList()); + List precursorEntities = entitiesToMigrate.stream() + .map(MigrationEntity::getPrecursorEntity) + .toList(); + + log.info("Finding all possible entities"); + Map> tempEntitiesByValue = entityFindingUtility.findAllPossibleEntitiesAndGroupByValue(document, precursorEntities); for (MigrationEntity migrationEntity : entitiesToMigrate) { Optional optionalTextEntity = entityFindingUtility.findClosestEntityAndReturnEmptyIfNotFound(migrationEntity.getPrecursorEntity(), @@ -297,45 +272,33 @@ public class RedactionLogToEntityLogMigrationService { continue; } - TextEntity entity = createCorrectEntity(migrationEntity.getPrecursorEntity(), document, optionalTextEntity.get().getTextRange()); - migrationEntity.setMigratedEntity(entity); - migrationEntity.setOldId(migrationEntity.getPrecursorEntity().getId()); - migrationEntity.setNewId(entity.getId()); // Can only be on one page, since redactionLogEntries can only be on one page + TextEntity migratedEntity = EntityFromPrecursorCreationService.createCorrectEntity(migrationEntity.getPrecursorEntity(), optionalTextEntity.get(), true); + migrationEntity.setMigratedEntity(migratedEntity); + migrationEntity.setOldId(migrationEntity.getPrecursorEntity().getId()); + migrationEntity.setNewId(migratedEntity.getId()); } tempEntitiesByValue.values() .stream() .flatMap(Collection::stream) .forEach(TextEntity::removeFromGraph); + return entitiesToMigrate; } - private static boolean lastManualChangeIsRemove(MigrationEntity migrationEntity) { - if (migrationEntity.getRedactionLogEntry().getManualChanges() == null) { - return false; + private boolean isOnExistingPage(RedactionLogEntry redactionLogEntry, int numberOfPages){ + var pages = redactionLogEntry.getPositions().stream().map(Rectangle::getPage).collect(Collectors.toSet()); + + for (int page: pages){ + if(page > numberOfPages){ + return false; + } } - - return migrationEntity.getRedactionLogEntry().getManualChanges() - .stream() - .reduce((a, b) -> b) - .map(m -> m.getManualRedactionType().equals(ManualRedactionType.REMOVE_LOCALLY)) - .orElse(false); + return true; } - private TextEntity createCorrectEntity(PrecursorEntity precursorEntity, SemanticNode node, TextRange closestTextRange) { - - EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService); - TextEntity correctEntity = entityCreationService.forceByTextRange(closestTextRange, precursorEntity.getType(), precursorEntity.getEntityType(), node); - - correctEntity.addMatchedRules(precursorEntity.getMatchedRuleList()); - correctEntity.setDictionaryEntry(precursorEntity.isDictionaryEntry()); - correctEntity.setDossierDictionaryEntry(precursorEntity.isDossierDictionaryEntry()); - correctEntity.getManualOverwrite().addChanges(precursorEntity.getManualOverwrite().getManualChangeLog()); - return correctEntity; - } - } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigratedEntityLog.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigratedEntityLog.java index aff967e5..31a9e30d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigratedEntityLog.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigratedEntityLog.java @@ -1,7 +1,10 @@ package com.iqser.red.service.redaction.v1.server.model; +import java.util.List; + import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.migration.MigratedIds; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; import lombok.AllArgsConstructor; import lombok.Builder; @@ -16,5 +19,4 @@ public class MigratedEntityLog { MigratedIds migratedIds; EntityLog entityLog; - } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java index bd948b08..5d14ad0f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java @@ -1,34 +1,47 @@ package com.iqser.red.service.redaction.v1.server.model; +import static com.iqser.red.service.redaction.v1.server.service.EntityLogCreatorService.buildEntryState; +import static com.iqser.red.service.redaction.v1.server.service.EntityLogCreatorService.buildEntryType; + +import java.awt.geom.Rectangle2D; +import java.time.OffsetDateTime; import java.util.Collections; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; -import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ChangeType; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position; -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Change; -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine; -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualChange; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.DictionaryEntryType; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualRedactionType; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; +import com.iqser.red.service.redaction.v1.server.migration.MigrationMapper; import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; import com.iqser.red.service.redaction.v1.server.service.ManualChangeFactory; +import com.iqser.red.service.redaction.v1.server.service.ManualChangesApplicationService; import lombok.AllArgsConstructor; +import lombok.Builder; import lombok.Data; import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +@Slf4j @Data +@Builder @AllArgsConstructor @RequiredArgsConstructor public final class MigrationEntity { @@ -38,28 +51,73 @@ public final class MigrationEntity { private IEntity migratedEntity; private String oldId; private String newId; + private String fileId; + + @Builder.Default + List manualChanges = new LinkedList<>(); - public static MigrationEntity fromRedactionLogEntry(RedactionLogEntry redactionLogEntry, boolean hint) { + public static MigrationEntity fromRedactionLogEntry(RedactionLogEntry redactionLogEntry, boolean hint, String fileId) { - return new MigrationEntity(createPrecursorEntity(redactionLogEntry, hint), redactionLogEntry); + PrecursorEntity precursorEntity = createPrecursorEntity(redactionLogEntry, hint); + + if (precursorEntity.getEntityType().equals(EntityType.HINT) && !redactionLogEntry.isHint() && !redactionLogEntry.isRedacted()) { + precursorEntity.ignore(precursorEntity.getRuleIdentifier(), precursorEntity.getReason()); + } else if (redactionLogEntry.lastChangeIsRemoved()) { + precursorEntity.remove(precursorEntity.getRuleIdentifier(), precursorEntity.getReason()); + } else if (lastManualChangeIsRemove(redactionLogEntry)) { + precursorEntity.ignore(precursorEntity.getRuleIdentifier(), precursorEntity.getReason()); + } else if (precursorEntity.isApplied() && redactionLogEntry.isRecommendation()) { + precursorEntity.skip(precursorEntity.getRuleIdentifier(), precursorEntity.getReason()); + } else if (precursorEntity.isApplied()) { + precursorEntity.apply(precursorEntity.getRuleIdentifier(), precursorEntity.getReason(), precursorEntity.getLegalBasis()); + } else { + precursorEntity.skip(precursorEntity.getRuleIdentifier(), precursorEntity.getReason()); + } + + return MigrationEntity.builder().precursorEntity(precursorEntity).redactionLogEntry(redactionLogEntry).oldId(redactionLogEntry.getId()).fileId(fileId).build(); + } + + + public static MigrationEntity fromRedactionLogImage(RedactionLogEntry redactionLogImage, Image image, String fileId) { + + return MigrationEntity.builder().redactionLogEntry(redactionLogImage).migratedEntity(image).oldId(redactionLogImage.getId()).newId(image.getId()).fileId(fileId).build(); + } + + + private static boolean lastManualChangeIsRemove(RedactionLogEntry redactionLogEntry) { + + if (redactionLogEntry.getManualChanges() == null) { + return false; + } + + return redactionLogEntry.getManualChanges() + .stream() + .reduce((a, b) -> b) + .map(m -> m.getManualRedactionType().equals(ManualRedactionType.REMOVE_LOCALLY)) + .orElse(false); } public static PrecursorEntity createPrecursorEntity(RedactionLogEntry redactionLogEntry, boolean hint) { String ruleIdentifier = buildRuleIdentifier(redactionLogEntry); - List rectangleWithPages = redactionLogEntry.getPositions().stream().map(RectangleWithPage::fromRedactionLogRectangle).toList(); + List rectangleWithPages = redactionLogEntry.getPositions() + .stream() + .map(RectangleWithPage::fromRedactionLogRectangle) + .toList(); EntityType entityType = getEntityType(redactionLogEntry, hint); return PrecursorEntity.builder() .id(redactionLogEntry.getId()) .value(redactionLogEntry.getValue()) .entityPosition(rectangleWithPages) .ruleIdentifier(ruleIdentifier) - .reason(Optional.ofNullable(redactionLogEntry.getReason()).orElse("")) + .reason(Optional.ofNullable(redactionLogEntry.getReason()) + .orElse("")) .legalBasis(redactionLogEntry.getLegalBasis()) .type(redactionLogEntry.getType()) .section(redactionLogEntry.getSection()) + .engines(MigrationMapper.getMigratedEngines(redactionLogEntry)) .entityType(entityType) .applied(redactionLogEntry.isRedacted()) .isDictionaryEntry(redactionLogEntry.isDictionaryEntry()) @@ -100,14 +158,6 @@ public final class MigrationEntity { } - private static com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Change toEntityLogChanges(Change change) { - - return new com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Change(change.getAnalysisNumber(), - toEntityLogType(change.getType()), - change.getDateTime()); - } - - private static EntryType getEntryType(EntityType entityType) { return switch (entityType) { @@ -120,42 +170,6 @@ public final class MigrationEntity { } - private static com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange toEntityLogManualChanges(ManualChange manualChange) { - - return new com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange(toManualRedactionType(manualChange.getManualRedactionType()), - manualChange.getProcessedDate(), - manualChange.getRequestedDate(), - manualChange.getUserId(), - manualChange.getPropertyChanges()); - } - - - private static ChangeType toEntityLogType(com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ChangeType type) { - - return switch (type) { - case ADDED -> ChangeType.ADDED; - case REMOVED -> ChangeType.REMOVED; - case CHANGED -> ChangeType.CHANGED; - }; - } - - - private static com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType toManualRedactionType(ManualRedactionType manualRedactionType) { - - return switch (manualRedactionType) { - case ADD_LOCALLY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.ADD_LOCALLY; - case ADD_TO_DICTIONARY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.ADD_TO_DICTIONARY; - case REMOVE_LOCALLY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.REMOVE_LOCALLY; - case REMOVE_FROM_DICTIONARY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.REMOVE_FROM_DICTIONARY; - case FORCE_REDACT -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.FORCE_REDACT; - case FORCE_HINT -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.FORCE_HINT; - case RECATEGORIZE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.RECATEGORIZE; - case LEGAL_BASIS_CHANGE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.LEGAL_BASIS_CHANGE; - case RESIZE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.RESIZE; - }; - } - - public EntityLogEntry toEntityLogEntry(Map oldToNewIdMapping) { EntityLogEntry entityLogEntry; @@ -171,10 +185,13 @@ public final class MigrationEntity { entityLogEntry.setManualChanges(ManualChangeFactory.toManualChangeList(migratedEntity.getManualOverwrite().getManualChangeLog(), redactionLogEntry.isHint())); entityLogEntry.setColor(redactionLogEntry.getColor()); - entityLogEntry.setChanges(redactionLogEntry.getChanges().stream().map(MigrationEntity::toEntityLogChanges).toList()); + entityLogEntry.setChanges(redactionLogEntry.getChanges() + .stream() + .map(MigrationMapper::toEntityLogChanges) + .toList()); entityLogEntry.setReference(migrateSetOfIds(redactionLogEntry.getReference(), oldToNewIdMapping)); entityLogEntry.setImportedRedactionIntersections(migrateSetOfIds(redactionLogEntry.getImportedRedactionIntersections(), oldToNewIdMapping)); - entityLogEntry.setEngines(getMigratedEngines(redactionLogEntry)); + entityLogEntry.setEngines(MigrationMapper.getMigratedEngines(redactionLogEntry)); if (redactionLogEntry.getLegalBasis() != null) { entityLogEntry.setLegalBasis(redactionLogEntry.getLegalBasis()); } @@ -192,53 +209,26 @@ public final class MigrationEntity { return entityLogEntry.getManualChanges() .stream() .reduce((a, b) -> b) - .filter(mc -> mc.getManualRedactionType() - .equals(com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.REMOVE_LOCALLY)) + .filter(mc -> mc.getManualRedactionType().equals(com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.REMOVE)) .isPresent(); } - private List migrateManualChanges(List manualChanges) { - - if (manualChanges == null) { - return Collections.emptyList(); - } - return manualChanges.stream().map(MigrationEntity::toEntityLogManualChanges).toList(); - } - - - private static Set getMigratedEngines(RedactionLogEntry entry) { - - if (entry.getEngines() == null) { - return Collections.emptySet(); - } - return entry.getEngines().stream().map(MigrationEntity::toEntityLogEngine).collect(Collectors.toSet()); - } - - private Set migrateSetOfIds(Set ids, Map oldToNewIdMapping) { if (ids == null) { return Collections.emptySet(); } - return ids.stream().map(oldToNewIdMapping::get).collect(Collectors.toSet()); - } - - - private static com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine toEntityLogEngine(Engine engine) { - - return switch (engine) { - case DICTIONARY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine.DICTIONARY; - case NER -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine.NER; - case RULE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine.RULE; - }; + return ids.stream() + .map(oldToNewIdMapping::get) + .collect(Collectors.toSet()); } public EntityLogEntry createEntityLogEntry(Image image) { List positions = getPositionsFromOverride(image).orElse(List.of(new Position(image.getPosition(), image.getPage().getNumber()))); - return EntityLogEntry.builder() + return EntityLogEntry.builder() .id(image.getId()) .value(image.value()) .type(image.type()) @@ -249,7 +239,8 @@ public final class MigrationEntity { .positions(positions) .containingNodeId(image.getTreeId()) .closestHeadline(image.getHeadline().getTextBlock().getSearchText()) - .section(redactionLogEntry.getSection()) + .section(image.getManualOverwrite().getSection() + .orElse(redactionLogEntry.getSection())) .textAfter(redactionLogEntry.getTextAfter()) .textBefore(redactionLogEntry.getTextBefore()) .imageHasTransparency(image.isTransparent()) @@ -270,7 +261,8 @@ public final class MigrationEntity { .type(precursorEntity.type()) .state(buildEntryState(precursorEntity)) .entryType(buildEntryType(precursorEntity)) - .section(redactionLogEntry.getSection()) + .section(precursorEntity.getManualOverwrite().getSection() + .orElse(redactionLogEntry.getSection())) .textAfter(redactionLogEntry.getTextAfter()) .textBefore(redactionLogEntry.getTextBefore()) .containingNodeId(Collections.emptyList()) @@ -280,12 +272,11 @@ public final class MigrationEntity { .dossierDictionaryEntry(precursorEntity.isDossierDictionaryEntry()) .startOffset(-1) .endOffset(-1) - .positions(precursorEntity.getManualOverwrite() - .getPositions() - .orElse(precursorEntity.getEntityPosition()) - .stream() - .map(entityPosition -> new Position(entityPosition.rectangle2D(), entityPosition.pageNumber())) - .toList()) + .positions(precursorEntity.getManualOverwrite().getPositions() + .orElse(precursorEntity.getEntityPosition()) + .stream() + .map(entityPosition -> new Position(entityPosition.rectangle2D(), entityPosition.pageNumber())) + .toList()) .engines(Collections.emptySet()) .build(); } @@ -300,11 +291,13 @@ public final class MigrationEntity { .positions(rectanglesPerLine) .reason(entity.buildReasonWithManualChangeDescriptions()) .legalBasis(entity.legalBasis()) - .value(entity.getManualOverwrite().getValue().orElse(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue())) + .value(entity.getManualOverwrite().getValue() + .orElse(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue())) .type(entity.type()) - .section(redactionLogEntry.getSection()) - .textAfter(redactionLogEntry.getTextAfter()) - .textBefore(redactionLogEntry.getTextBefore()) + .section(entity.getManualOverwrite().getSection() + .orElse(redactionLogEntry.getSection())) + .textAfter(entity.getTextAfter()) + .textBefore(entity.getTextBefore()) .containingNodeId(entity.getDeepestFullyContainingNode().getTreeId()) .closestHeadline(entity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText()) .matchedRule(entity.getMatchedRule().getRuleIdentifier().toString()) @@ -322,54 +315,126 @@ public final class MigrationEntity { private static List getRectanglesPerLine(TextEntity entity) { return getPositionsFromOverride(entity).orElse(entity.getPositionsOnPagePerPage() - .get(0) - .getRectanglePerLine() - .stream() - .map(rectangle2D -> new Position(rectangle2D, entity.getPositionsOnPagePerPage().get(0).getPage().getNumber())) - .toList()); + .get(0).getRectanglePerLine() + .stream() + .map(rectangle2D -> new Position(rectangle2D, + entity.getPositionsOnPagePerPage() + .get(0).getPage().getNumber())) + .toList()); } private static Optional> getPositionsFromOverride(IEntity entity) { - return entity.getManualOverwrite().getPositions().map(rects -> rects.stream().map(r -> new Position(r.rectangle2D(), r.pageNumber())).toList()); - } - - - private EntryState buildEntryState(IEntity entity) { - - if (entity.applied() && entity.active()) { - return EntryState.APPLIED; - } else if (entity.skipped() && entity.active()) { - return EntryState.SKIPPED; - } else if (entity.ignored()) { - return EntryState.IGNORED; - } else { - return EntryState.REMOVED; - } - } - - - private EntryType buildEntryType(IEntity entity) { - - if (entity instanceof TextEntity textEntity) { - return getEntryType(textEntity.getEntityType()); - } else if (entity instanceof PrecursorEntity precursorEntity) { - if (precursorEntity.isRectangle()) { - return EntryType.AREA; - } - return getEntryType(precursorEntity.getEntityType()); - } else if (entity instanceof Image) { - return EntryType.IMAGE; - } - throw new UnsupportedOperationException(String.format("Entity subclass %s is not implemented!", entity.getClass())); + return entity.getManualOverwrite().getPositions() + .map(rects -> rects.stream() + .map(r -> new Position(r.rectangle2D(), r.pageNumber())) + .toList()); } public boolean hasManualChangesOrComments() { return !(redactionLogEntry.getManualChanges() == null || redactionLogEntry.getManualChanges().isEmpty()) || // - !(redactionLogEntry.getComments() == null || redactionLogEntry.getComments().isEmpty()); + !(redactionLogEntry.getComments() == null || redactionLogEntry.getComments().isEmpty()) // + || hasManualChanges(); + } + + + public boolean hasManualChanges() { + + return !manualChanges.isEmpty(); + } + + + public void applyManualChanges(List manualChangesToApply, ManualChangesApplicationService manualChangesApplicationService) { + + manualChanges.addAll(manualChangesToApply); + manualChangesToApply.forEach(manualChange -> { + if (manualChange instanceof ManualResizeRedaction manualResizeRedaction && migratedEntity instanceof TextEntity textEntity) { + manualResizeRedaction.setAnnotationId(newId); + manualChangesApplicationService.resize(textEntity, manualResizeRedaction); + } else { + migratedEntity.getManualOverwrite().addChange(manualChange); + } + }); + } + + + public ManualRedactionEntry buildManualRedactionEntry() { + + assert hasManualChanges(); + + // currently we need to insert a manual redaction entry, whenever an entity has been resized. + String user = manualChanges.stream() + .filter(mc -> mc instanceof ManualResizeRedaction) + .findFirst() + .orElse(manualChanges.get(0)).getUser(); + + OffsetDateTime requestDate = manualChanges.get(0).getRequestDate(); + + return ManualRedactionEntry.builder() + .annotationId(newId) + .fileId(fileId) + .user(user) + .requestDate(requestDate) + .type(redactionLogEntry.getType()) + .value(redactionLogEntry.getValue()) + .reason(redactionLogEntry.getReason()) + .legalBasis(redactionLogEntry.getLegalBasis()) + .section(redactionLogEntry.getSection()) + .rectangle(false) + .addToDictionary(false) + .addToDossierDictionary(false) + .positions(buildPositions(migratedEntity)) + .textAfter(redactionLogEntry.getTextAfter()) + .textBefore(redactionLogEntry.getTextBefore()) + .dictionaryEntryType(DictionaryEntryType.ENTRY) + .build(); + } + + + private List buildPositions(IEntity entity) { + + if (entity instanceof TextEntity textEntity) { + + var positionsOnPage = textEntity.getPositionsOnPagePerPage() + .get(0); + return positionsOnPage.getRectanglePerLine() + .stream() + .map(p -> new Rectangle((float) p.getX(), (float) p.getY(), (float) p.getWidth(), (float) p.getHeight(), positionsOnPage.getPage().getNumber())) + .toList(); + } + if (entity instanceof PrecursorEntity pEntity) { + + return pEntity.getManualOverwrite().getPositions() + .orElse(pEntity.getEntityPosition()) + .stream() + .map(p -> new Rectangle((float) p.rectangle2D().getX(), + (float) p.rectangle2D().getY(), + (float) p.rectangle2D().getWidth(), + (float) p.rectangle2D().getHeight(), + p.pageNumber())) + .toList(); + } + if (entity instanceof Image image) { + + Rectangle2D position = image.getManualOverwrite().getPositions() + .map(p -> p.get(0).rectangle2D()) + .orElse(image.getPosition()); + + return List.of(new Rectangle((float) position.getX(), (float) position.getY(), (float) position.getWidth(), (float) position.getHeight(), image.getPage().getNumber())); + + } else { + throw new UnsupportedOperationException(); + } + } + + + public boolean needsManualEntry() { + + return manualChanges.stream() + .anyMatch(mc -> mc instanceof ManualResizeRedaction && !((ManualResizeRedaction) mc).getUpdateDictionary()) && !(migratedEntity instanceof Image); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/PrecursorEntity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/PrecursorEntity.java index 22416c0e..3058c5d7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/PrecursorEntity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/PrecursorEntity.java @@ -43,7 +43,6 @@ public class PrecursorEntity implements IEntity { String type; String section; EntityType entityType; - EntryType entryType; boolean applied; boolean isDictionaryEntry; boolean isDossierDictionaryEntry; @@ -61,8 +60,8 @@ public class PrecursorEntity implements IEntity { .stream() .map(RectangleWithPage::fromAnnotationRectangle) .toList(); + var entityType = hint ? EntityType.HINT : EntityType.ENTITY; - var entryType = hint ? EntryType.HINT : (manualRedactionEntry.isRectangle() ? EntryType.AREA : EntryType.ENTITY); ManualChangeOverwrite manualChangeOverwrite = new ManualChangeOverwrite(entityType); manualChangeOverwrite.addChange(manualRedactionEntry); return PrecursorEntity.builder() @@ -75,7 +74,6 @@ public class PrecursorEntity implements IEntity { .type(manualRedactionEntry.getType()) .section(manualRedactionEntry.getSection()) .entityType(entityType) - .entryType(entryType) .applied(true) .isDictionaryEntry(false) .isDossierDictionaryEntry(false) @@ -103,7 +101,6 @@ public class PrecursorEntity implements IEntity { .type(entityLogEntry.getType()) .section(entityLogEntry.getSection()) .entityType(entityType) - .entryType(entityLogEntry.getEntryType()) .isDictionaryEntry(entityLogEntry.isDictionaryEntry()) .isDossierDictionaryEntry(entityLogEntry.isDossierDictionaryEntry()) .manualOverwrite(new ManualChangeOverwrite(entityType)) @@ -134,7 +131,6 @@ public class PrecursorEntity implements IEntity { .type(Optional.ofNullable(importedRedaction.getType()) .orElse(IMPORTED_REDACTION_TYPE)) .entityType(entityType) - .entryType(entryType) .isDictionaryEntry(false) .isDossierDictionaryEntry(false) .rectangle(value.isBlank() || entryType.equals(EntryType.IMAGE) || entryType.equals(EntryType.IMAGE_HINT) || entryType.equals(EntryType.AREA)) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Image.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Image.java index e591af11..0a3243fb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Image.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Image.java @@ -70,7 +70,9 @@ public class Image implements GenericSemanticNode, IEntity { @Override public TextBlock getTextBlock() { - return streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector()); + return streamAllSubNodes().filter(SemanticNode::isLeaf) + .map(SemanticNode::getLeafTextBlock) + .collect(new TextBlockCollector()); } @@ -91,7 +93,8 @@ public class Image implements GenericSemanticNode, IEntity { @Override public String type() { - return getManualOverwrite().getType().orElse(imageType.toString()); + return getManualOverwrite().getType() + .orElse(imageType.toString().toLowerCase(Locale.ENGLISH)); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java index 100f3fe9..4d56c729 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java @@ -9,6 +9,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.stream.Stream; @@ -64,8 +65,7 @@ public class Table implements SemanticNode { */ public Stream streamEntitiesWhereRowContainsStringsIgnoreCase(List strings) { - return IntStream.range(0, numberOfRows) - .boxed() + return IntStream.range(0, numberOfRows).boxed() .filter(row -> rowContainsStringsIgnoreCase(row, strings)) .flatMap(this::streamRow) .map(TableCell::getEntities) @@ -82,8 +82,11 @@ public class Table implements SemanticNode { */ public boolean rowContainsStringsIgnoreCase(Integer row, List strings) { - String rowText = streamRow(row).map(TableCell::getTextBlock).collect(new TextBlockCollector()).getSearchText().toLowerCase(Locale.ROOT); - return strings.stream().map(String::toLowerCase).allMatch(rowText::contains); + String rowText = streamRow(row).map(TableCell::getTextBlock) + .collect(new TextBlockCollector()).getSearchText().toLowerCase(Locale.ROOT); + return strings.stream() + .map(String::toLowerCase) + .allMatch(rowText::contains); } @@ -96,9 +99,13 @@ public class Table implements SemanticNode { */ public Stream streamEntitiesWhereRowHasHeaderAndValue(String header, String value) { - List vertebrateStudyCols = streamHeaders().filter(headerNode -> headerNode.containsString(header)).map(TableCell::getCol).toList(); + List vertebrateStudyCols = streamHeaders().filter(headerNode -> headerNode.containsString(header)) + .map(TableCell::getCol) + .toList(); return streamTableCells().filter(tableCellNode -> vertebrateStudyCols.stream() - .anyMatch(vertebrateStudyCol -> getCell(tableCellNode.getRow(), vertebrateStudyCol).containsString(value))).map(TableCell::getEntities).flatMap(Collection::stream); + .anyMatch(vertebrateStudyCol -> getCell(tableCellNode.getRow(), vertebrateStudyCol).containsString(value))) + .map(TableCell::getEntities) + .flatMap(Collection::stream); } @@ -111,9 +118,13 @@ public class Table implements SemanticNode { */ public Stream streamEntitiesWhereRowHasHeaderAndAnyValue(String header, List values) { - List colsWithHeader = streamHeaders().filter(headerNode -> headerNode.containsString(header)).map(TableCell::getCol).toList(); + List colsWithHeader = streamHeaders().filter(headerNode -> headerNode.containsString(header)) + .map(TableCell::getCol) + .toList(); return streamTableCells().filter(tableCellNode -> colsWithHeader.stream() - .anyMatch(colWithHeader -> getCell(tableCellNode.getRow(), colWithHeader).containsAnyString(values))).map(TableCell::getEntities).flatMap(Collection::stream); + .anyMatch(colWithHeader -> getCell(tableCellNode.getRow(), colWithHeader).containsAnyString(values))) + .map(TableCell::getEntities) + .flatMap(Collection::stream); } @@ -126,16 +137,33 @@ public class Table implements SemanticNode { */ public Stream streamEntitiesWhereRowContainsEntitiesOfType(List types) { - List rowsWithEntityOfType = getEntities().stream() - .filter(TextEntity::active) - .filter(redactionEntity -> types.stream().anyMatch(type -> type.equals(redactionEntity.type()))) - .map(TextEntity::getIntersectingNodes) - .filter(node -> node instanceof TableCell) - .map(node -> (TableCell) node) - .map(TableCell::getRow) - .toList(); + return IntStream.range(0, numberOfRows).boxed() + .filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type) + .anyMatch(types::contains)) + .flatMap(this::streamRow) + .map(TableCell::getEntities) + .flatMap(Collection::stream); + } - return rowsWithEntityOfType.stream().flatMap(this::streamRow).map(TableCell::getEntities).flatMap(Collection::stream); + + /** + * Streams all entities in this table, that appear in a row, which contains at least one entity of each of the provided types. + * Ignores Entity with ignored == true or removed == true. + * + * @param types type strings to check whether a row contains an entity like them + * @return Stream of all entities in this table, that appear in a row, which contains at least one entity of each of the provided types. + */ + public Stream streamEntitiesWhereRowContainsEntitiesOfEachType(List types) { + + return IntStream.range(0, numberOfRows).boxed() + .filter(rowNumber -> { + Set entityTypes = streamTextEntitiesInRow(rowNumber).map(TextEntity::type) + .collect(Collectors.toSet()); + return entityTypes.containsAll(types); + }) + .flatMap(this::streamRow) + .map(TableCell::getEntities) + .flatMap(Collection::stream); } @@ -148,18 +176,43 @@ public class Table implements SemanticNode { */ public Stream streamEntitiesWhereRowContainsNoEntitiesOfType(List types) { - return IntStream.range(0, numberOfRows) - .boxed() - .filter(rowNumber -> streamRow(rowNumber).map(TableCell::getEntities) - .flatMap(Collection::stream) - .filter(TextEntity::active) - .noneMatch(entity -> types.contains(entity.type()))) + return IntStream.range(0, numberOfRows).boxed() + .filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type) + .noneMatch(types::contains)) .flatMap(this::streamRow) .map(TableCell::getEntities) .flatMap(Collection::stream); } + /** + * Streams all Entities in the given row. + * + * @param rowNumber the row number to look for + * @return stream of TextEntities occurring in row + */ + public Stream streamTextEntitiesInRow(int rowNumber) { + + return streamRow(rowNumber).map(TableCell::getEntities) + .flatMap(Collection::stream) + .filter(TextEntity::active); + } + + + /** + * Streams all Entities in the given col. + * + * @param colNumber the column number to look for + * @return stream of TextEntities occurring in row + */ + public Stream streamTextEntitiesInCol(int colNumber) { + + return streamCol(colNumber).map(TableCell::getEntities) + .flatMap(Collection::stream) + .filter(TextEntity::active); + } + + /** * Returns a TableCell at the provided row and column location. * @@ -173,7 +226,8 @@ public class Table implements SemanticNode { throw new IllegalArgumentException(format("row %d, col %d is out of bounds for number of rows of %d and number of cols %d", row, col, numberOfRows, numberOfCols)); } int idx = row * numberOfCols + col; - return (TableCell) documentTree.getEntryById(treeId).getChildren().get(idx).getNode(); + return (TableCell) documentTree.getEntryById(treeId).getChildren() + .get(idx).getNode(); } @@ -196,7 +250,7 @@ public class Table implements SemanticNode { */ public Stream streamTableCellsWhichContainType(String type) { - return streamTableCells().filter(tableCell -> tableCell.getEntities().stream().filter(TextEntity::active).anyMatch(entity -> entity.type().equals(type))); + return streamTableCells().filter(tableCell -> tableCell.hasEntitiesOfType(type)); } @@ -222,7 +276,8 @@ public class Table implements SemanticNode { */ public Stream streamCol(int col) { - return IntStream.range(0, numberOfRows).boxed().map(row -> getCell(row, col)); + return IntStream.range(0, numberOfRows).boxed() + .map(row -> getCell(row, col)); } @@ -234,7 +289,8 @@ public class Table implements SemanticNode { */ public Stream streamRow(int row) { - return IntStream.range(0, numberOfCols).boxed().map(col -> getCell(row, col)); + return IntStream.range(0, numberOfCols).boxed() + .map(col -> getCell(row, col)); } @@ -258,7 +314,8 @@ public class Table implements SemanticNode { */ public Stream streamHeadersForCell(int row, int col) { - return Stream.concat(streamRow(row), streamCol(col)).filter(TableCell::isHeader); + return Stream.concat(streamRow(row), streamCol(col)) + .filter(TableCell::isHeader); } @@ -348,7 +405,9 @@ public class Table implements SemanticNode { public TextBlock getTextBlock() { if (textBlock == null) { - textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector()); + textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf) + .map(SemanticNode::getLeafTextBlock) + .collect(new TextBlockCollector()); } return textBlock; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java index 2d50d3f6..d7b7dc80 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java @@ -1,7 +1,6 @@ package com.iqser.red.service.redaction.v1.server.service; import java.time.OffsetDateTime; -import java.util.Comparator; import java.util.List; import java.util.Optional; import java.util.Set; @@ -13,7 +12,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ChangeType; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState; -import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; @@ -26,10 +24,9 @@ import lombok.extern.slf4j.Slf4j; @Slf4j @Service @RequiredArgsConstructor -@FieldDefaults(makeFinal=true, level= AccessLevel.PRIVATE) +@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) public class EntityChangeLogService { - @Timed("redactmanager_computeChanges") public boolean computeChanges(List previousEntityLogEntries, List newEntityLogEntries, ManualRedactions manualRedactions, int analysisNumber) { @@ -42,7 +39,9 @@ public class EntityChangeLogService { boolean hasChanges = false; for (EntityLogEntry entityLogEntry : newEntityLogEntries) { - Optional optionalPreviousEntity = previousEntityLogEntries.stream().filter(entry -> entry.getId().equals(entityLogEntry.getId())).findAny(); + Optional optionalPreviousEntity = previousEntityLogEntries.stream() + .filter(entry -> entry.getId().equals(entityLogEntry.getId())) + .findAny(); if (optionalPreviousEntity.isEmpty()) { hasChanges = true; entityLogEntry.getChanges().add(new Change(analysisNumber, ChangeType.ADDED, now)); @@ -56,65 +55,30 @@ public class EntityChangeLogService { ChangeType changeType = calculateChangeType(entityLogEntry.getState(), previousEntity.getState()); entityLogEntry.getChanges().add(new Change(analysisNumber, changeType, now)); } - - addManualChanges(entityLogEntry, previousEntity); } addRemovedEntriesAsRemoved(previousEntityLogEntries, newEntityLogEntries, manualRedactions, analysisNumber, now); return hasChanges; } - // If a manual change is present in the previous entity but not in the new entity, add it to the new one and - // sort them, so they are displayed in the correct order. - private void addManualChanges(EntityLogEntry entityLogEntry, EntityLogEntry previousEntity) { - - Comparator manualChangeComparator = - Comparator.comparing(ManualChange::getManualRedactionType) - .thenComparing(ManualChange::getRequestedDate); - - previousEntity.getManualChanges().forEach(manualChange -> { - boolean contains = entityLogEntry.getManualChanges() - .stream() - .anyMatch(existingChange -> manualChangeComparator.compare(existingChange, manualChange) == 0); - - if (!contains) { - entityLogEntry.getManualChanges().add(manualChange); - entityLogEntry.getManualChanges().sort(Comparator.comparing(ManualChange::getRequestedDate)); - } - }); - } - - private void addRemovedEntriesAsRemoved(List previousEntityLogEntries, - List newEntityLogEntries, - ManualRedactions manualRedactions, - int analysisNumber, - OffsetDateTime now) { + List newEntityLogEntries, + ManualRedactions manualRedactions, + int analysisNumber, + OffsetDateTime now) { - Set existingIds = newEntityLogEntries.stream().map(EntityLogEntry::getId).collect(Collectors.toSet()); + Set existingIds = newEntityLogEntries.stream() + .map(EntityLogEntry::getId) + .collect(Collectors.toSet()); List removedEntries = previousEntityLogEntries.stream() .filter(entry -> !existingIds.contains(entry.getId())) .toList(); removedEntries.forEach(entry -> entry.getChanges().add(new Change(analysisNumber, ChangeType.REMOVED, now))); removedEntries.forEach(entry -> entry.setState(EntryState.REMOVED)); - removedEntries.forEach(entry -> addManualChangeForDictionaryRemovals(entry, manualRedactions)); newEntityLogEntries.addAll(removedEntries); } - private void addManualChangeForDictionaryRemovals(EntityLogEntry entry, ManualRedactions manualRedactions) { - - if (manualRedactions == null || manualRedactions.getIdsToRemove().isEmpty()) { - return; - } - - manualRedactions.getIdsToRemove().stream() - .filter(IdRemoval::isRemoveFromDictionary)// - .filter(removed -> removed.getAnnotationId().equals(entry.getId()))// - .findFirst()// - .ifPresent(idRemove -> entry.getManualChanges().add(ManualChangeFactory.toManualChange(idRemove, false))); - } - private ChangeType calculateChangeType(EntryState state, EntryState previousState) { if (state.equals(previousState)) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java index f630a968..2f03ef2c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java @@ -11,6 +11,7 @@ import java.util.stream.Collectors; import org.springframework.stereotype.Service; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogChanges; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry; @@ -18,7 +19,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.legalbasis.LegalBasis; import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings; import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient; @@ -26,6 +26,7 @@ import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity; import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryVersion; import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity; +import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite; import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; @@ -66,20 +67,19 @@ public class EntityLogCreatorService { List entityLogEntries = createEntityLogEntries(document, analyzeRequest, notFoundEntities); List legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId()); - EntityLog entityLog = new EntityLog(redactionServiceSettings.getAnalysisVersion(), - analyzeRequest.getAnalysisNumber(), - entityLogEntries, - toEntityLogLegalBasis(legalBasis), - dictionaryVersion.getDossierTemplateVersion(), - dictionaryVersion.getDossierVersion(), - rulesVersion, - legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId())); List previousExistingEntityLogEntries = getPreviousEntityLogEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); entityChangeLogService.computeChanges(previousExistingEntityLogEntries, entityLogEntries, analyzeRequest.getManualRedactions(), analyzeRequest.getAnalysisNumber()); - return entityLog; + return new EntityLog(redactionServiceSettings.getAnalysisVersion(), + analyzeRequest.getAnalysisNumber(), + entityLogEntries, + toEntityLogLegalBasis(legalBasis), + dictionaryVersion.getDossierTemplateVersion(), + dictionaryVersion.getDossierVersion(), + rulesVersion, + legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId())); } @@ -114,21 +114,24 @@ public class EntityLogCreatorService { DictionaryVersion dictionaryVersion) { List newEntityLogEntries = createEntityLogEntries(document, analyzeRequest, notFoundEntries).stream() - .filter(entry -> entry.getContainingNodeId().isEmpty() || sectionsToReanalyseIds.contains(entry.getContainingNodeId().get(0))) + .filter(entry -> entry.getContainingNodeId().isEmpty() || sectionsToReanalyseIds.contains(entry.getContainingNodeId() + .get(0))) .collect(Collectors.toList()); - Set newEntityIds = newEntityLogEntries.stream().map(EntityLogEntry::getId).collect(Collectors.toSet()); + Set newEntityIds = newEntityLogEntries.stream() + .map(EntityLogEntry::getId) + .collect(Collectors.toSet()); List previousEntriesFromReAnalyzedSections = previousEntityLog.getEntityLogEntry() .stream() .filter(entry -> (newEntityIds.contains(entry.getId()) || entry.getContainingNodeId().isEmpty() || sectionsToReanalyseIds.contains(entry.getContainingNodeId() - .get(0)))) - .toList(); + .get(0)))) + .collect(Collectors.toList()); previousEntityLog.getEntityLogEntry().removeAll(previousEntriesFromReAnalyzedSections); boolean hasChanges = entityChangeLogService.computeChanges(previousEntriesFromReAnalyzedSections, - newEntityLogEntries, - analyzeRequest.getManualRedactions(), - analyzeRequest.getAnalysisNumber()); + newEntityLogEntries, + analyzeRequest.getManualRedactions(), + analyzeRequest.getAnalysisNumber()); previousEntityLog.getEntityLogEntry().addAll(newEntityLogEntries); return updateVersionsAndReturnChanges(previousEntityLog, dictionaryVersion, analyzeRequest, hasChanges); @@ -137,22 +140,6 @@ public class EntityLogCreatorService { private List createEntityLogEntries(Document document, AnalyzeRequest analyzeRequest, List notFoundPrecursorEntries) { - Set dictionaryEntries; - Set dictionaryEntriesValues; - - if (analyzeRequest.getManualRedactions() != null && !analyzeRequest.getManualRedactions().getEntriesToAdd().isEmpty()) { - dictionaryEntries = analyzeRequest.getManualRedactions().getEntriesToAdd() - .stream() - .filter(e -> e.isAddToDictionary() || e.isAddToDossierDictionary()) - .collect(Collectors.toSet()); - dictionaryEntriesValues = dictionaryEntries.stream() - .map(ManualRedactionEntry::getValue) - .collect(Collectors.toSet()); - } else { - dictionaryEntriesValues = new HashSet<>(); - dictionaryEntries = new HashSet<>(); - } - String dossierTemplateId = analyzeRequest.getDossierTemplateId(); List entries = new ArrayList<>(); @@ -162,22 +149,21 @@ public class EntityLogCreatorService { .filter(entity -> !entity.getValue().isEmpty()) .filter(EntityLogCreatorService::notFalsePositiveOrFalseRecommendation) .filter(entity -> !entity.removed()) - .forEach(entityNode -> entries.addAll(toEntityLogEntries(entityNode, dictionaryEntries, dictionaryEntriesValues))); - document.streamAllImages().filter(entity -> !entity.removed()).forEach(imageNode -> entries.add(createEntityLogEntry(imageNode, dossierTemplateId))); - notFoundPrecursorEntries.stream().filter(entity -> !entity.removed()).forEach(precursorEntity -> entries.add(createEntityLogEntry(precursorEntity, dossierTemplateId))); + .forEach(entityNode -> entries.addAll(toEntityLogEntries(entityNode))); + document.streamAllImages() + .filter(entity -> !entity.removed()) + .forEach(imageNode -> entries.add(createEntityLogEntry(imageNode, dossierTemplateId))); + notFoundPrecursorEntries.stream() + .filter(entity -> !entity.removed()) + .forEach(precursorEntity -> entries.add(createEntityLogEntry(precursorEntity, dossierTemplateId))); return entries; } - private List toEntityLogEntries(TextEntity textEntity, Set dictionaryEntries, Set dictionaryEntriesValues) { + private List toEntityLogEntries(TextEntity textEntity) { List entityLogEntries = new ArrayList<>(); - // Adding ADD_TO_DICTIONARY manual change to the entity's manual overwrite - if (dictionaryEntriesValues.contains(textEntity.getValue())) { - textEntity.getManualOverwrite().addChange(dictionaryEntries.stream().filter(entry -> entry.getValue().equals(textEntity.getValue())).findFirst().get()); - } - // split entity into multiple entries if it occurs on multiple pages, since FE can't handle multi page entities for (PositionOnPage positionOnPage : textEntity.getPositionsOnPagePerPage()) { @@ -204,7 +190,7 @@ public class EntityLogCreatorService { boolean isHint = dictionaryService.isHint(imageType, dossierTemplateId); return EntityLogEntry.builder() .id(image.getId()) - .value(image.value()) + .value(image.getValue()) .type(imageType) .reason(image.buildReasonWithManualChangeDescriptions()) .legalBasis(image.legalBasis()) @@ -213,11 +199,13 @@ public class EntityLogCreatorService { .positions(List.of(new Position(image.getPosition(), image.getPage().getNumber()))) .containingNodeId(image.getTreeId()) .closestHeadline(image.getHeadline().getTextBlock().getSearchText()) - .section(image.getManualOverwrite().getSection().orElse(image.getParent().toString())) + .section(image.getManualOverwrite().getSection() + .orElse(image.getParent().toString())) .imageHasTransparency(image.isTransparent()) .manualChanges(ManualChangeFactory.toManualChangeList(image.getManualOverwrite().getManualChangeLog(), isHint)) .state(buildEntryState(image)) .entryType(isHint ? EntryType.IMAGE_HINT : EntryType.IMAGE) + .engines(getEngines(null, image.getManualOverwrite())) .build(); } @@ -225,7 +213,8 @@ public class EntityLogCreatorService { private EntityLogEntry createEntityLogEntry(PrecursorEntity precursorEntity, String dossierTemplateId) { - String type = precursorEntity.getManualOverwrite().getType().orElse(precursorEntity.getType()); + String type = precursorEntity.getManualOverwrite().getType() + .orElse(precursorEntity.getType()); boolean isHint = isHint(precursorEntity.getEntityType()); return EntityLogEntry.builder() .id(precursorEntity.getId()) @@ -235,7 +224,8 @@ public class EntityLogCreatorService { .type(type) .state(buildEntryState(precursorEntity)) .entryType(buildEntryType(precursorEntity)) - .section(precursorEntity.getManualOverwrite().getSection().orElse(precursorEntity.getSection())) + .section(precursorEntity.getManualOverwrite().getSection() + .orElse(precursorEntity.getSection())) .containingNodeId(Collections.emptyList()) .closestHeadline("") .matchedRule(precursorEntity.getMatchedRule().getRuleIdentifier().toString()) @@ -245,13 +235,12 @@ public class EntityLogCreatorService { .textBefore("") .startOffset(-1) .endOffset(-1) - .positions(precursorEntity.getManualOverwrite() - .getPositions() - .orElse(precursorEntity.getEntityPosition()) - .stream() - .map(entityPosition -> new Position(entityPosition.rectangle2D(), entityPosition.pageNumber())) - .toList()) - .engines(precursorEntity.getEngines()) + .positions(precursorEntity.getManualOverwrite().getPositions() + .orElse(precursorEntity.getEntityPosition()) + .stream() + .map(entityPosition -> new Position(entityPosition.rectangle2D(), entityPosition.pageNumber())) + .toList()) + .engines(getEngines(precursorEntity.getEngines(), precursorEntity.getManualOverwrite())) //imported is no longer used, frontend should check engines //(was .imported(precursorEntity.getEngines() != null && precursorEntity.getEngines().contains(Engine.IMPORTED))) .imported(false) @@ -264,14 +253,20 @@ public class EntityLogCreatorService { private EntityLogEntry createEntityLogEntry(TextEntity entity) { Set referenceIds = new HashSet<>(); - entity.references().stream().filter(TextEntity::active).forEach(ref -> ref.getPositionsOnPagePerPage().forEach(pos -> referenceIds.add(pos.getId()))); + entity.references() + .stream() + .filter(TextEntity::active) + .forEach(ref -> ref.getPositionsOnPagePerPage() + .forEach(pos -> referenceIds.add(pos.getId()))); boolean isHint = isHint(entity.getEntityType()); return EntityLogEntry.builder() .reason(entity.buildReasonWithManualChangeDescriptions()) .legalBasis(entity.legalBasis()) - .value(entity.getManualOverwrite().getValue().orElse(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue())) + .value(entity.getManualOverwrite().getValue() + .orElse(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue())) .type(entity.type()) - .section(entity.getManualOverwrite().getSection().orElse(entity.getDeepestFullyContainingNode().toString())) + .section(entity.getManualOverwrite().getSection() + .orElse(entity.getDeepestFullyContainingNode().toString())) .containingNodeId(entity.getDeepestFullyContainingNode().getTreeId()) .closestHeadline(entity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText()) .matchedRule(entity.getMatchedRule().getRuleIdentifier().toString()) @@ -281,7 +276,7 @@ public class EntityLogCreatorService { .startOffset(entity.getTextRange().start()) .endOffset(entity.getTextRange().end()) .dossierDictionaryEntry(entity.isDossierDictionaryEntry()) - .engines(entity.getEngines() != null ? entity.getEngines() : Collections.emptySet()) + .engines(getEngines(entity.getEngines(), entity.getManualOverwrite())) //imported is no longer used, frontend should check engines //(was .imported(entity.getEngines() != null && entity.getEngines().contains(Engine.IMPORTED))) .imported(false) @@ -293,13 +288,24 @@ public class EntityLogCreatorService { } + private Set getEngines(Set currentEngines, ManualChangeOverwrite manualChangeOverwrite) { + + Set engines = currentEngines != null ? new HashSet<>(currentEngines) : new HashSet<>(); + + if (manualChangeOverwrite != null && !manualChangeOverwrite.getManualChangeLog().isEmpty()) { + engines.add(Engine.MANUAL); + } + return engines; + } + + private boolean isHint(EntityType entityType) { return entityType.equals(EntityType.HINT); } - private EntryState buildEntryState(IEntity entity) { + public static EntryState buildEntryState(IEntity entity) { if (entity.applied() && entity.active()) { return EntryState.APPLIED; @@ -313,12 +319,17 @@ public class EntityLogCreatorService { } - private EntryType buildEntryType(IEntity entity) { + public static EntryType buildEntryType(IEntity entity) { if (entity instanceof TextEntity textEntity) { return getEntryType(textEntity.getEntityType()); } else if (entity instanceof PrecursorEntity precursorEntity) { - return precursorEntity.getEntryType(); + if (precursorEntity.isRectangle()) { + return EntryType.AREA; + } + return getEntryType(precursorEntity.getEntityType()); + } else if (entity instanceof Image) { + return EntryType.IMAGE; } throw new UnsupportedOperationException(String.format("Entity subclass %s is not implemented!", entity.getClass())); } @@ -338,7 +349,9 @@ public class EntityLogCreatorService { private List toEntityLogLegalBasis(List legalBasis) { - return legalBasis.stream().map(l -> new EntityLogLegalBasis(l.getName(), l.getDescription(), l.getReason())).collect(Collectors.toList()); + return legalBasis.stream() + .map(l -> new EntityLogLegalBasis(l.getName(), l.getDescription(), l.getReason())) + .collect(Collectors.toList()); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ManualChangeFactory.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ManualChangeFactory.java index 0add07c9..3d99a3f9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ManualChangeFactory.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ManualChangeFactory.java @@ -33,13 +33,13 @@ public class ManualChangeFactory { if (baseAnnotation instanceof ManualRecategorization imageRecategorization) { manualChange.withManualRedactionType(ManualRedactionType.RECATEGORIZE).withChange("type", imageRecategorization.getType()); } else if (baseAnnotation instanceof IdRemoval manualRemoval) { - manualChange.withManualRedactionType(manualRemoval.isRemoveFromDictionary() ? ManualRedactionType.REMOVE_FROM_DICTIONARY : ManualRedactionType.REMOVE_LOCALLY); - } else if (baseAnnotation instanceof ManualForceRedaction) { - manualChange.withManualRedactionType(isHint ? ManualRedactionType.FORCE_HINT : ManualRedactionType.FORCE_REDACT); + manualChange.withManualRedactionType(manualRemoval.isRemoveFromDictionary() ? ManualRedactionType.REMOVE_FROM_DICTIONARY : ManualRedactionType.REMOVE); + } else if (baseAnnotation instanceof ManualForceRedaction manualForceRedaction) { + manualChange.withManualRedactionType(ManualRedactionType.FORCE).withChange("legalBasis", manualForceRedaction.getLegalBasis()); } else if (baseAnnotation instanceof ManualResizeRedaction manualResizeRedact) { manualChange.withManualRedactionType(manualResizeRedact.getUpdateDictionary() ? ManualRedactionType.RESIZE_IN_DICTIONARY : ManualRedactionType.RESIZE).withChange("value", manualResizeRedact.getValue()); } else if (baseAnnotation instanceof ManualRedactionEntry manualRedactionEntry) { - manualChange.withManualRedactionType(manualRedactionEntry.isAddToDictionary() ? ManualRedactionType.ADD_TO_DICTIONARY : ManualRedactionType.ADD_LOCALLY) + manualChange.withManualRedactionType(manualRedactionEntry.isAddToDictionary() ? ManualRedactionType.ADD_TO_DICTIONARY : ManualRedactionType.ADD) .withChange("value", manualRedactionEntry.getValue()); } else if (baseAnnotation instanceof ManualLegalBasisChange manualLegalBasisChange) { manualChange.withManualRedactionType(ManualRedactionType.LEGAL_BASIS_CHANGE) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/UnprocessedChangesService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/UnprocessedChangesService.java index b7dfd989..0accaa89 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/UnprocessedChangesService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/UnprocessedChangesService.java @@ -16,6 +16,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; import com.iqser.red.service.redaction.v1.model.AnalyzeResponse; @@ -25,15 +26,12 @@ import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper; -import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService; -import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichmentService; import com.iqser.red.service.redaction.v1.server.service.document.EntityFindingUtility; import com.iqser.red.service.redaction.v1.server.service.document.EntityFromPrecursorCreationService; import com.iqser.red.service.redaction.v1.server.storage.ObservedStorageService; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; import io.micrometer.observation.annotation.Observed; -import jakarta.annotation.PostConstruct; import lombok.AccessLevel; import lombok.RequiredArgsConstructor; import lombok.experimental.FieldDefaults; @@ -51,20 +49,10 @@ public class UnprocessedChangesService { final ObservedStorageService observedStorageService; final EntityFindingUtility entityFindingUtility; final RedactionStorageService redactionStorageService; - final EntityEnrichmentService entityEnrichmentService; final EntityFromPrecursorCreationService entityFromPrecursorCreationService; final DictionaryService dictionaryService; final ManualChangesApplicationService manualChangesApplicationService; - EntityCreationService entityCreationService; - - - @PostConstruct - public void initEntityCreationService() { - - entityCreationService = new EntityCreationService(entityEnrichmentService); - } - @Observed(name = "UnprocessedChangesService", contextualName = "analyse-surrounding-text") public void analyseSurroundingText(AnalyzeRequest analyzeRequest) { @@ -76,11 +64,19 @@ public class UnprocessedChangesService { EntityLog previousEntityLog = redactionStorageService.getEntityLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); Document document = DocumentGraphMapper.toDocumentGraph(observedStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId())); - Set allAnnotationIds = analyzeRequest.getManualRedactions().getEntriesToAdd().stream().map(ManualRedactionEntry::getAnnotationId).collect(Collectors.toSet()); - Set resizeIds = analyzeRequest.getManualRedactions().getResizeRedactions().stream().map(ManualResizeRedaction::getAnnotationId).collect(Collectors.toSet()); + Set allAnnotationIds = analyzeRequest.getManualRedactions().getEntriesToAdd() + .stream() + .map(ManualRedactionEntry::getAnnotationId) + .collect(Collectors.toSet()); + Set resizeIds = analyzeRequest.getManualRedactions().getResizeRedactions() + .stream() + .map(ManualResizeRedaction::getAnnotationId) + .collect(Collectors.toSet()); allAnnotationIds.addAll(resizeIds); - List manualResizeRedactions = analyzeRequest.getManualRedactions().getResizeRedactions().stream().toList(); + List manualResizeRedactions = analyzeRequest.getManualRedactions().getResizeRedactions() + .stream() + .toList(); List manualEntitiesToBeResized = previousEntityLog.getEntityLogEntry() .stream() .filter(entityLogEntry -> resizeIds.contains(entityLogEntry.getId())) @@ -99,31 +95,36 @@ public class UnprocessedChangesService { notFoundManualEntities = entityFromPrecursorCreationService.toTextEntity(manualEntities, document); } - document.getEntities().forEach(textEntity -> { - Set processedIds = new HashSet<>(); - for (var positionsOnPerPage : textEntity.getPositionsOnPagePerPage()) { - if (processedIds.contains(positionsOnPerPage.getId())) { - continue; - } - processedIds.add(positionsOnPerPage.getId()); - List positions = positionsOnPerPage.getRectanglePerLine() - .stream() - .map(rectangle2D -> new Position(rectangle2D, positionsOnPerPage.getPage().getNumber())) - .collect(Collectors.toList()); - unprocessedManualEntities.add(UnprocessedManualEntity.builder() - .annotationId(allAnnotationIds.stream().filter(textEntity::matchesAnnotationId).findFirst().orElse("")) - .textBefore(textEntity.getTextBefore()) - .textAfter(textEntity.getTextAfter()) - .section(textEntity.getManualOverwrite().getSection().orElse(textEntity.getDeepestFullyContainingNode().toString())) - .positions(positions) - .build()); - } - }); + document.getEntities() + .forEach(textEntity -> { + Set processedIds = new HashSet<>(); + for (var positionsOnPerPage : textEntity.getPositionsOnPagePerPage()) { + if (processedIds.contains(positionsOnPerPage.getId())) { + continue; + } + processedIds.add(positionsOnPerPage.getId()); + List positions = positionsOnPerPage.getRectanglePerLine() + .stream() + .map(rectangle2D -> new Position(rectangle2D, positionsOnPerPage.getPage().getNumber())) + .collect(Collectors.toList()); + unprocessedManualEntities.add(UnprocessedManualEntity.builder() + .annotationId(allAnnotationIds.stream() + .filter(textEntity::matchesAnnotationId) + .findFirst() + .orElse("")) + .textBefore(textEntity.getTextBefore()) + .textAfter(textEntity.getTextAfter()) + .section(textEntity.getManualOverwrite().getSection() + .orElse(textEntity.getDeepestFullyContainingNode().toString())) + .positions(positions) + .build()); + } + }); notFoundManualEntities.forEach(manualEntity -> unprocessedManualEntities.add(builDefaultUnprocessedManualEntity(manualEntity))); rabbitTemplate.convertAndSend(QueueNames.REDACTION_ANALYSIS_RESPONSE_QUEUE, - AnalyzeResponse.builder().fileId(analyzeRequest.getFileId()).unprocessedManualEntities(unprocessedManualEntities).build()); + AnalyzeResponse.builder().fileId(analyzeRequest.getFileId()).unprocessedManualEntities(unprocessedManualEntities).build()); } @@ -143,13 +144,13 @@ public class UnprocessedChangesService { continue; } - TextEntity correctEntity = createCorrectEntity(precursorEntity, optionalTextEntity.get()); + TextEntity correctEntity = EntityFromPrecursorCreationService.createCorrectEntity(precursorEntity, optionalTextEntity.get()); Optional optionalManualResizeRedaction = manualResizeRedactions.stream() .filter(manualResizeRedaction -> manualResizeRedaction.getAnnotationId().equals(precursorEntity.getId())) .findFirst(); if (optionalManualResizeRedaction.isPresent()) { ManualResizeRedaction manualResizeRedaction = optionalManualResizeRedaction.get(); - manualChangesApplicationService.resizeEntityAndReinsert(correctEntity, manualResizeRedaction); + manualChangesApplicationService.resize(correctEntity, manualResizeRedaction); // If the entity's value is not the same as the manual resize request's value it means we didn't find it anywhere and we want to remove it // from the graph, so it does not get processed and sent back to persistence-service to update its value. @@ -160,60 +161,38 @@ public class UnprocessedChangesService { } // remove all temp entities from the graph - tempEntities.values().stream().flatMap(Collection::stream).forEach(TextEntity::removeFromGraph); + tempEntities.values() + .stream() + .flatMap(Collection::stream) + .forEach(TextEntity::removeFromGraph); } - private TextEntity createCorrectEntity(PrecursorEntity precursorEntity, TextEntity closestEntity) { + private UnprocessedManualEntity builDefaultUnprocessedManualEntity(PrecursorEntity precursorEntity) { - TextEntity correctEntity = TextEntity.initialEntityNode(closestEntity.getTextRange(), precursorEntity.type(), precursorEntity.getEntityType(), precursorEntity.getId()); - - correctEntity.setDeepestFullyContainingNode(closestEntity.getDeepestFullyContainingNode()); - correctEntity.setIntersectingNodes(new ArrayList<>(closestEntity.getIntersectingNodes())); - correctEntity.setDuplicateTextRanges(new ArrayList<>(closestEntity.getDuplicateTextRanges())); - correctEntity.setPages(new HashSet<>(closestEntity.getPages())); - - correctEntity.setValue(closestEntity.getValue()); - correctEntity.setTextAfter(closestEntity.getTextAfter()); - correctEntity.setTextBefore(closestEntity.getTextBefore()); - - correctEntity.getIntersectingNodes().forEach(n -> n.getEntities().add(correctEntity)); - correctEntity.getPages().forEach(page -> page.getEntities().add(correctEntity)); - - correctEntity.addMatchedRules(precursorEntity.getMatchedRuleList()); - correctEntity.setDictionaryEntry(precursorEntity.isDictionaryEntry()); - correctEntity.setDossierDictionaryEntry(precursorEntity.isDossierDictionaryEntry()); - correctEntity.getManualOverwrite().addChanges(precursorEntity.getManualOverwrite().getManualChangeLog()); - - return correctEntity; -} + return UnprocessedManualEntity.builder() + .annotationId(precursorEntity.getId()) + .textAfter("") + .textBefore("") + .section("") + .positions(precursorEntity.getManualOverwrite().getPositions() + .orElse(precursorEntity.getEntityPosition()) + .stream() + .map(entityPosition -> new Position(entityPosition.rectangle2D(), entityPosition.pageNumber())) + .toList()) + .build(); + } -private UnprocessedManualEntity builDefaultUnprocessedManualEntity(PrecursorEntity precursorEntity) { + private List manualEntitiesConverter(ManualRedactions manualRedactions, String dossierTemplateId) { - return UnprocessedManualEntity.builder() - .annotationId(precursorEntity.getId()) - .textAfter("") - .textBefore("") - .section("") - .positions(precursorEntity.getManualOverwrite() - .getPositions() - .orElse(precursorEntity.getEntityPosition()) - .stream() - .map(entityPosition -> new Position(entityPosition.rectangle2D(), entityPosition.pageNumber())) - .toList()) - .build(); -} - - -private List manualEntitiesConverter(ManualRedactions manualRedactions, String dossierTemplateId) { - - return manualRedactions.getEntriesToAdd() - .stream() - .filter(manualRedactionEntry -> manualRedactionEntry.getPositions() != null && !manualRedactionEntry.getPositions().isEmpty()) - .map(manualRedactionEntry -> PrecursorEntity.fromManualRedactionEntry(manualRedactionEntry, - dictionaryService.isHint(manualRedactionEntry.getType(), dossierTemplateId))) - .toList(); -} + return manualRedactions.getEntriesToAdd() + .stream() + .filter(manualRedactionEntry -> manualRedactionEntry.getPositions() != null && !manualRedactionEntry.getPositions().isEmpty()) + .filter(BaseAnnotation::isLocal) + .map(manualRedactionEntry -> PrecursorEntity.fromManualRedactionEntry(manualRedactionEntry, + dictionaryService.isHint(manualRedactionEntry.getType(), dossierTemplateId))) + .toList(); + } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java index 76b9cfb2..77f80a0a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java @@ -47,7 +47,9 @@ public class EntityFindingUtility { } - public Optional findClosestEntityAndReturnEmptyIfNotFound(PrecursorEntity precursorEntity, Map> entitiesWithSameValue, double matchThreshold) { + public Optional findClosestEntityAndReturnEmptyIfNotFound(PrecursorEntity precursorEntity, + Map> entitiesWithSameValue, + double matchThreshold) { if (precursorEntity.getValue() == null) { return Optional.empty(); @@ -56,7 +58,7 @@ public class EntityFindingUtility { List possibleEntities = entitiesWithSameValue.get(precursorEntity.getValue().toLowerCase(Locale.ENGLISH)); if (entityIdentifierValueNotFound(possibleEntities)) { - log.warn("Entity could not be created with precursorEntity: {}, due to the value {} not being found anywhere.", precursorEntity, precursorEntity.getValue()); + log.info("Entity could not be created with precursorEntity: {}, due to the value {} not being found anywhere.", precursorEntity, precursorEntity.getValue()); return Optional.empty(); } @@ -66,18 +68,22 @@ public class EntityFindingUtility { .min(Comparator.comparingDouble(ClosestEntity::getDistance)); if (optionalClosestEntity.isEmpty()) { - log.warn("No Entity with value {} found on page {}", precursorEntity.getValue(), precursorEntity.getEntityPosition()); + log.info("No Entity with value {} found on page {}", precursorEntity.getValue(), precursorEntity.getEntityPosition()); return Optional.empty(); } ClosestEntity closestEntity = optionalClosestEntity.get(); if (closestEntity.getDistance() > matchThreshold) { - log.warn("For entity {} on page {} with positions {} distance to closest found entity is {} and therefore higher than the threshold of {}", - precursorEntity.getValue(), - precursorEntity.getEntityPosition().get(0).pageNumber(), - precursorEntity.getEntityPosition().stream().map(RectangleWithPage::rectangle2D).toList(), - closestEntity.getDistance(), - matchThreshold); + log.info("For entity {} on page {} with positions {} distance to closest found entity is {} and therefore higher than the threshold of {}", + precursorEntity.getValue(), + precursorEntity.getEntityPosition() + .get(0).pageNumber(), + precursorEntity.getEntityPosition() + .stream() + .map(RectangleWithPage::rectangle2D) + .toList(), + closestEntity.getDistance(), + matchThreshold); return Optional.empty(); } @@ -93,8 +99,14 @@ public class EntityFindingUtility { private static boolean pagesMatch(TextEntity entity, List originalPositions) { - Set entityPageNumbers = entity.getPositionsOnPagePerPage().stream().map(PositionOnPage::getPage).map(Page::getNumber).collect(Collectors.toSet()); - Set originalPageNumbers = originalPositions.stream().map(RectangleWithPage::pageNumber).collect(Collectors.toSet()); + Set entityPageNumbers = entity.getPositionsOnPagePerPage() + .stream() + .map(PositionOnPage::getPage) + .map(Page::getNumber) + .collect(Collectors.toSet()); + Set originalPageNumbers = originalPositions.stream() + .map(RectangleWithPage::pageNumber) + .collect(Collectors.toSet()); return entityPageNumbers.containsAll(originalPageNumbers); } @@ -105,15 +117,16 @@ public class EntityFindingUtility { return Double.MAX_VALUE; } return originalPositions.stream() - .mapToDouble(rectangleWithPage -> calculateMinDistancePerRectangle(entity, rectangleWithPage.pageNumber(), rectangleWithPage.rectangle2D())) - .average() + .mapToDouble(rectangleWithPage -> calculateMinDistancePerRectangle(entity, rectangleWithPage.pageNumber(), rectangleWithPage.rectangle2D())).average() .orElse(Double.MAX_VALUE); } private static long countRectangles(TextEntity entity) { - return entity.getPositionsOnPagePerPage().stream().mapToLong(redactionPosition -> redactionPosition.getRectanglePerLine().size()).sum(); + return entity.getPositionsOnPagePerPage() + .stream() + .mapToLong(redactionPosition -> redactionPosition.getRectanglePerLine().size()).sum(); } @@ -161,7 +174,8 @@ public class EntityFindingUtility { pageNumbers.stream().filter(pageNumber -> !node.onPage(pageNumber)).toList(), node.getPages())); } - SearchImplementation searchImplementation = new SearchImplementation(entryValues, true); + + SearchImplementation searchImplementation = new SearchImplementation(entryValues.stream().map(String::trim).collect(Collectors.toSet()), true); return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange()) .stream() diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java index faddf9bd..7607a990 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java @@ -9,7 +9,6 @@ import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions; @@ -23,46 +22,34 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNo import com.iqser.red.service.redaction.v1.server.service.DictionaryService; import lombok.AccessLevel; +import lombok.RequiredArgsConstructor; import lombok.experimental.FieldDefaults; import lombok.extern.slf4j.Slf4j; @Slf4j @Service +@RequiredArgsConstructor @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) public class EntityFromPrecursorCreationService { static double MATCH_THRESHOLD = 10; // Is compared to the average sum of distances in pdf coordinates for each corner of the bounding box of the entities EntityFindingUtility entityFindingUtility; - EntityCreationService entityCreationService; DictionaryService dictionaryService; - @Autowired - public EntityFromPrecursorCreationService(EntityEnrichmentService entityEnrichmentService, DictionaryService dictionaryService, EntityFindingUtility entityFindingUtility) { - - this.entityFindingUtility = entityFindingUtility; - entityCreationService = new EntityCreationService(entityEnrichmentService); - this.dictionaryService = dictionaryService; - } - - public List createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions manualRedactions, SemanticNode node, String dossierTemplateId) { Set idRemovals = manualRedactions.getIdsToRemove(); List manualEntities = manualRedactions.getEntriesToAdd() .stream() - .filter(manualRedactionEntry -> !(idRemovals.stream() - .map(BaseAnnotation::getAnnotationId) - .toList() - .contains(manualRedactionEntry.getAnnotationId()) && manualRedactionEntry.getRequestDate() - .isBefore(idRemovals.stream() - .filter(idRemoval -> idRemoval.getAnnotationId().equals(manualRedactionEntry.getAnnotationId())) - .findFirst() - .get() - .getRequestDate()))) - .filter(manualRedactionEntry -> !(manualRedactionEntry.isAddToDictionary() || manualRedactionEntry.isAddToDossierDictionary())) - .map(manualRedactionEntry -> PrecursorEntity.fromManualRedactionEntry(manualRedactionEntry, - dictionaryService.isHint(manualRedactionEntry.getType(), dossierTemplateId))) + .filter(BaseAnnotation::isLocal) + .filter(manualRedactionEntry -> idRemovals.stream() + .filter(idRemoval -> idRemoval.getAnnotationId().equals(manualRedactionEntry.getAnnotationId())) + .filter(idRemoval -> idRemoval.getRequestDate().isBefore(manualRedactionEntry.getRequestDate())) + .findAny()// + .isEmpty()) + .map(manualRedactionEntry -> // + PrecursorEntity.fromManualRedactionEntry(manualRedactionEntry, dictionaryService.isHint(manualRedactionEntry.getType(), dossierTemplateId))) .peek(manualEntity -> { if (manualEntity.getEntityType().equals(EntityType.HINT)) { manualEntity.skip("MAN.5.1", "manual hint is skipped by default"); @@ -71,7 +58,6 @@ public class EntityFromPrecursorCreationService { } }) .toList(); - return toTextEntity(manualEntities, node); } @@ -90,8 +76,14 @@ public class EntityFromPrecursorCreationService { public List toTextEntity(List precursorEntities, SemanticNode node) { - var notFoundEntities = precursorEntities.stream().filter(PrecursorEntity::isRectangle).collect(Collectors.toList()); - var findableEntities = precursorEntities.stream().filter(precursorEntity -> !precursorEntity.isRectangle()).toList(); + var notFoundEntities = precursorEntities.stream() + .filter(PrecursorEntity::isRectangle) + .collect(Collectors.toList()); + + var findableEntities = precursorEntities.stream() + .filter(precursorEntity -> !precursorEntity.isRectangle()) + .toList(); + Map> tempEntitiesByValue = entityFindingUtility.findAllPossibleEntitiesAndGroupByValue(node, findableEntities); for (PrecursorEntity precursorEntity : findableEntities) { @@ -102,7 +94,12 @@ public class EntityFromPrecursorCreationService { } createCorrectEntity(precursorEntity, optionalClosestEntity.get()); } - tempEntitiesByValue.values().stream().flatMap(Collection::stream).forEach(TextEntity::removeFromGraph); + + tempEntitiesByValue.values() + .stream() + .flatMap(Collection::stream) + .forEach(TextEntity::removeFromGraph); + return notFoundEntities; } @@ -113,9 +110,23 @@ public class EntityFromPrecursorCreationService { * @param precursorEntity The entity identifier for the RedactionEntity. * @param closestEntity The closest Boundary to the RedactionEntity. */ - private void createCorrectEntity(PrecursorEntity precursorEntity, TextEntity closestEntity) { + public static TextEntity createCorrectEntity(PrecursorEntity precursorEntity, TextEntity closestEntity) { - TextEntity correctEntity = TextEntity.initialEntityNode(closestEntity.getTextRange(), precursorEntity.type(), precursorEntity.getEntityType(), precursorEntity.getId()); + return createCorrectEntity(precursorEntity, closestEntity, false); + } + + + public static TextEntity createCorrectEntity(PrecursorEntity precursorEntity, TextEntity closestEntity, boolean generateId) { + + TextEntity correctEntity; + if (generateId) { + correctEntity = TextEntity.initialEntityNode(closestEntity.getTextRange(), + precursorEntity.type(), + precursorEntity.getEntityType(), + closestEntity.getDeepestFullyContainingNode()); + } else { + correctEntity = TextEntity.initialEntityNode(closestEntity.getTextRange(), precursorEntity.type(), precursorEntity.getEntityType(), precursorEntity.getId()); + } correctEntity.setDeepestFullyContainingNode(closestEntity.getDeepestFullyContainingNode()); correctEntity.setIntersectingNodes(new ArrayList<>(closestEntity.getIntersectingNodes())); correctEntity.setDuplicateTextRanges(new ArrayList<>(closestEntity.getDuplicateTextRanges())); @@ -125,14 +136,17 @@ public class EntityFromPrecursorCreationService { correctEntity.setTextAfter(closestEntity.getTextAfter()); correctEntity.setTextBefore(closestEntity.getTextBefore()); - correctEntity.getIntersectingNodes().forEach(n -> n.getEntities().add(correctEntity)); - correctEntity.getPages().forEach(page -> page.getEntities().add(correctEntity)); + correctEntity.getIntersectingNodes() + .forEach(n -> n.getEntities().add(correctEntity)); + correctEntity.getPages() + .forEach(page -> page.getEntities().add(correctEntity)); correctEntity.addMatchedRules(precursorEntity.getMatchedRuleList()); correctEntity.setDictionaryEntry(precursorEntity.isDictionaryEntry()); correctEntity.setDossierDictionaryEntry(precursorEntity.isDossierDictionaryEntry()); correctEntity.getManualOverwrite().addChanges(precursorEntity.getManualOverwrite().getManualChangeLog()); correctEntity.addEngines(precursorEntity.getEngines()); + return correctEntity; } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/EntityDroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/EntityDroolsExecutionService.java index 88268f3a..9fb33529 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/EntityDroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/EntityDroolsExecutionService.java @@ -16,6 +16,7 @@ import org.springframework.stereotype.Service; import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation; import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings; import com.iqser.red.service.redaction.v1.server.model.NerEntities; import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; @@ -55,7 +56,14 @@ public class EntityDroolsExecutionService { ManualRedactions manualRedactions, NerEntities nerEntities) { - return executeRules(kieContainer, document, document.streamChildren().toList(), dictionary, fileAttributes, manualRedactions, nerEntities); + return executeRules(kieContainer, + document, + document.streamChildren() + .toList(), + dictionary, + fileAttributes, + manualRedactions, + nerEntities); } @@ -80,19 +88,28 @@ public class EntityDroolsExecutionService { kieSession.setGlobal("dictionary", dictionary); kieSession.insert(document); - document.getEntities().forEach(kieSession::insert); + + document.getEntities() + .forEach(kieSession::insert); + sectionsToAnalyze.forEach(kieSession::insert); - sectionsToAnalyze.stream().flatMap(SemanticNode::streamAllSubNodes).forEach(kieSession::insert); - document.getPages().forEach(kieSession::insert); - fileAttributes.stream().filter(f -> f.getValue() != null).forEach(kieSession::insert); + + sectionsToAnalyze.stream() + .flatMap(SemanticNode::streamAllSubNodes) + .forEach(kieSession::insert); + + document.getPages() + .forEach(kieSession::insert); + + fileAttributes.stream() + .filter(f -> f.getValue() != null) + .forEach(kieSession::insert); if (manualRedactions != null) { - manualRedactions.getResizeRedactions().forEach(kieSession::insert); - manualRedactions.getRecategorizations().forEach(kieSession::insert); - manualRedactions.getEntriesToAdd().forEach(kieSession::insert); - manualRedactions.getForceRedactions().forEach(kieSession::insert); - manualRedactions.getIdsToRemove().forEach(kieSession::insert); - manualRedactions.getLegalBasisChanges().forEach(kieSession::insert); + manualRedactions.buildAll() + .stream() + .filter(BaseAnnotation::isLocal) + .forEach(kieSession::insert); } kieSession.insert(nerEntities); @@ -105,7 +122,8 @@ public class EntityDroolsExecutionService { }); try { - completableFuture.orTimeout(settings.getDroolsExecutionTimeoutSecs(), TimeUnit.SECONDS).get(); + completableFuture.orTimeout(settings.getDroolsExecutionTimeoutSecs(), TimeUnit.SECONDS) + .get(); } catch (ExecutionException e) { kieSession.dispose(); if (e.getCause() instanceof TimeoutException) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/MigratedIdsCollector.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/MigratedIdsCollector.java index 295b7e18..38656b66 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/MigratedIdsCollector.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/MigratedIdsCollector.java @@ -1,5 +1,6 @@ package com.iqser.red.service.redaction.v1.server.utils; +import java.util.Collections; import java.util.LinkedList; import java.util.Set; import java.util.function.BiConsumer; @@ -17,7 +18,7 @@ public class MigratedIdsCollector implements Collector supplier() { - return () -> new MigratedIds(new LinkedList<>()); + return () -> new MigratedIds(new LinkedList<>(), Collections.emptyList()); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/MigrationIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/MigrationIntegrationTest.java index db60cd34..e3b704cc 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/MigrationIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/MigrationIntegrationTest.java @@ -31,6 +31,7 @@ import org.springframework.test.context.junit.jupiter.SpringExtension; import com.fasterxml.jackson.databind.ObjectMapper; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.migration.MigratedIds; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; @@ -49,7 +50,6 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils; import com.iqser.red.service.redaction.v1.server.service.DictionaryService; import com.iqser.red.service.redaction.v1.server.service.document.EntityFindingUtility; -import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations; import com.knecon.fforesight.tenantcommons.TenantContext; import lombok.SneakyThrows; @@ -107,7 +107,7 @@ public class MigrationIntegrationTest extends BuildDocumentIntegrationTest { @SneakyThrows public void testSave() { - MigratedIds ids = new MigratedIds(new LinkedList<>()); + MigratedIds ids = new MigratedIds(new LinkedList<>(), null); ids.addMapping("123", "321"); ids.addMapping("123", "321"); ids.addMapping("123", "321"); @@ -173,7 +173,11 @@ public class MigrationIntegrationTest extends BuildDocumentIntegrationTest { mergedRedactionLog = redactionLog; } - MigratedEntityLog migratedEntityLog = redactionLogToEntityLogMigrationService.migrate(mergedRedactionLog, document, TEST_DOSSIER_TEMPLATE_ID, manualRedactions); + MigratedEntityLog migratedEntityLog = redactionLogToEntityLogMigrationService.migrate(mergedRedactionLog, + document, + TEST_DOSSIER_TEMPLATE_ID, + manualRedactions, + TEST_FILE_ID); redactionStorageService.storeObject(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ENTITY_LOG, migratedEntityLog.getEntityLog()); assertEquals(mergedRedactionLog.getRedactionLogEntry().size(), migratedEntityLog.getEntityLog().getEntityLogEntry().size()); @@ -187,10 +191,11 @@ public class MigrationIntegrationTest extends BuildDocumentIntegrationTest { assertEquals(mergedRedactionLog.getLegalBasis().size(), entityLog.getLegalBasis().size()); Map migratedIds = migratedEntityLog.getMigratedIds().buildOldToNewMapping(); +// assertEquals(legacyRedactionLogMergeService.getNumberOfAffectedAnnotations(manualRedactions), migratedIds.size()); + migratedIds.forEach((oldId, newId) -> assertEntryIsEqual(oldId, newId, mergedRedactionLog, entityLog, migratedIds)); - AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID) - .build()); + AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build()); File outputFile = Path.of(OsUtils.getTemporaryDirectory()).resolve(Path.of(fileName.replaceAll(".pdf", "_MIGRATED.pdf")).getFileName()).toFile(); try (FileOutputStream fileOutputStream = new FileOutputStream(outputFile)) { @@ -268,13 +273,24 @@ public class MigrationIntegrationTest extends BuildDocumentIntegrationTest { if (!redactionLogEntry.isImage()) { assertEquals(redactionLogEntry.getValue().toLowerCase(Locale.ENGLISH), entityLogEntry.getValue().toLowerCase(Locale.ENGLISH)); } + if (entityLogEntry.getManualChanges() + .stream() + .noneMatch(mc -> mc.getManualRedactionType().equals(ManualRedactionType.RECATEGORIZE))) { + assertEquals(redactionLogEntry.getType(), entityLogEntry.getType()); + } assertEquals(redactionLogEntry.getChanges().size(), entityLogEntry.getChanges().size()); assertTrue(redactionLogEntry.getManualChanges().size() <= entityLogEntry.getManualChanges().size()); assertEquals(redactionLogEntry.getPositions().size(), entityLogEntry.getPositions().size()); - assertTrue(positionsAlmostEqual(redactionLogEntry.getPositions(), entityLogEntry.getPositions())); -// assertEquals(redactionLogEntry.getColor(), entityLogEntry.getColor()); - assertEqualsNullSafe(redactionLogEntry.getLegalBasis(), entityLogEntry.getLegalBasis()); -// assertEqualsNullSafe(redactionLogEntry.getReason(), entityLogEntry.getReason()); + if (entityLogEntry.getManualChanges() + .stream() + .noneMatch(mc -> mc.getManualRedactionType().equals(ManualRedactionType.RESIZE) || mc.getManualRedactionType().equals(ManualRedactionType.RESIZE_IN_DICTIONARY))) { + assertTrue(positionsAlmostEqual(redactionLogEntry.getPositions(), entityLogEntry.getPositions())); + } + if (entityLogEntry.getManualChanges() + .stream() + .noneMatch(mc -> mc.getManualRedactionType().equals(ManualRedactionType.FORCE))) { + assertEqualsNullSafe(redactionLogEntry.getLegalBasis(), entityLogEntry.getLegalBasis()); + } assertReferencesEqual(redactionLogEntry.getReference(), entityLogEntry.getReference(), oldToNewMapping); assertEquals(redactionLogEntry.isDictionaryEntry(), entityLogEntry.isDictionaryEntry()); assertEquals(redactionLogEntry.isDossierDictionaryEntry(), entityLogEntry.isDossierDictionaryEntry()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java index 3f7726bc..90c9bfa1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java @@ -87,15 +87,15 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, true)).thenReturn(List.of(Type.builder() - .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) - .type(DOSSIER_REDACTIONS_INDICATOR) - .dossierTemplateId(TEST_DOSSIER_ID) - .hexColor("#ffe187") - .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .build())); + .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(DOSSIER_REDACTIONS_INDICATOR) + .dossierTemplateId(TEST_DOSSIER_ID) + .hexColor("#ffe187") + .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .build())); mockDictionaryCalls(null); @@ -122,6 +122,7 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { assertThat(recommendations).containsExactlyInAnyOrder("Michael N.", "Funnarie B.", "Feuer A."); } + @Test public void acceptanceTests() throws IOException { @@ -133,8 +134,10 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { System.out.println("Finished analysis"); EntityLog entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst().orElseThrow(); - var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst().orElseThrow(); + var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst() + .orElseThrow(); + var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst() + .orElseThrow(); assertEquals(EntryState.SKIPPED, asyaLyon1.getState()); @@ -146,8 +149,10 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - var publishedInformationEntry2 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst().orElseThrow(); - var asyaLyon2 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry2.getContainingNodeId()).findFirst().orElseThrow(); + var publishedInformationEntry2 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst() + .orElseThrow(); + var asyaLyon2 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry2.getContainingNodeId()).findFirst() + .orElseThrow(); assertEquals(EntryState.APPLIED, asyaLyon2.getState()); @@ -168,13 +173,17 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { .stream() .filter(entry -> entry.getType().equals(type)) .filter(entry -> entry.getValue().equals(value)) - .filter(entry -> entry.getContainingNodeId().get(0).equals(sectionNumber.get(0))); + .filter(entry -> entry.getContainingNodeId() + .get(0).equals(sectionNumber.get(0))); } private static Stream findEntityByTypeAndValue(EntityLog redactionLog, String type, String value) { - return redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getType().equals(type)).filter(entry -> entry.getValue().equals(value)); + return redactionLog.getEntityLogEntry() + .stream() + .filter(entry -> entry.getType().equals(type)) + .filter(entry -> entry.getValue().equals(value)); } @@ -201,13 +210,15 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { var redactionLog2 = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); assertEquals(EntryState.IGNORED, - findEntityByTypeAndValue(redactionLog2, "CBI_author", "Desiree").filter(entry -> entry.getEntryType().equals(EntryType.ENTITY)).findFirst().get().getState()); + findEntityByTypeAndValue(redactionLog2, "CBI_author", "Desiree").filter(entry -> entry.getEntryType().equals(EntryType.ENTITY)) + .findFirst() + .get().getState()); } private static IdRemoval buildIdRemoval(String id) { - return IdRemoval.builder().annotationId(id).requestDate(OffsetDateTime.now()).fileId(TEST_FILE_ID).build(); + return IdRemoval.builder().annotationId(id).user("user").requestDate(OffsetDateTime.now()).fileId(TEST_FILE_ID).build(); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index e10a911b..da2a57ad 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -119,15 +119,15 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, true)).thenReturn(List.of(Type.builder() - .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) - .type(DOSSIER_REDACTIONS_INDICATOR) - .dossierTemplateId(TEST_DOSSIER_ID) - .hexColor("#ffe187") - .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .build())); + .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(DOSSIER_REDACTIONS_INDICATOR) + .dossierTemplateId(TEST_DOSSIER_ID) + .hexColor("#ffe187") + .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .build())); mockDictionaryCalls(null); @@ -169,9 +169,10 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - entityLog.getEntityLogEntry().forEach(entry -> { - duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry); - }); + entityLog.getEntityLogEntry() + .forEach(entry -> { + duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry); + }); duplicates.forEach((key, value) -> assertThat(value.size()).isEqualTo(1)); @@ -216,12 +217,14 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { ManualRedactions manualRedactions = ManualRedactions.builder() .resizeRedactions(Set.of(ManualResizeRedaction.builder() - .annotationId("c6be5277f5ee60dc3d83527798b7fe02") - .value("Dr. Alan") - .positions(List.of(new Rectangle(236.8f, 182.90005f, 40.584f, 12.642f, 7))) - .requestDate(OffsetDateTime.now()) - .updateDictionary(false) - .build())) + .annotationId("c6be5277f5ee60dc3d83527798b7fe02") + .fileId(TEST_FILE_ID) + .value("Dr. Alan") + .positions(List.of(new Rectangle(236.8f, 182.90005f, 40.584f, 12.642f, 7))) + .requestDate(OffsetDateTime.now()) + .updateDictionary(false) + .user("user") + .build())) .build(); request.setManualRedactions(manualRedactions); @@ -256,7 +259,10 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { var redactionLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - var values = redactionLog.getEntityLogEntry().stream().map(EntityLogEntry::getValue).collect(Collectors.toList()); + var values = redactionLog.getEntityLogEntry() + .stream() + .map(EntityLogEntry::getValue) + .collect(Collectors.toList()); assertThat(values).containsExactlyInAnyOrder("Lastname M.", "Doe", "Doe J.", "M. Mustermann", "Mustermann M.", "F. Lastname"); } @@ -268,8 +274,8 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { ClassPathResource importedRedactionClasspathResource = new ClassPathResource( "files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.IMPORTED_REDACTIONS.json"); storageService.storeObject(TenantContext.getTenantId(), - RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), - importedRedactionClasspathResource.getInputStream()); + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), + importedRedactionClasspathResource.getInputStream()); AnalyzeRequest request = uploadFileToStorage("files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.pdf"); System.out.println("Start Full integration test"); @@ -353,10 +359,18 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { var mergedEntityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - var cbiAddressBeforeHintRemoval = entityLog.getEntityLogEntry().stream().filter(re -> re.getType().equalsIgnoreCase("CBI_Address")).findAny().get(); + var cbiAddressBeforeHintRemoval = entityLog.getEntityLogEntry() + .stream() + .filter(re -> re.getType().equalsIgnoreCase("CBI_Address")) + .findAny() + .get(); assertThat(cbiAddressBeforeHintRemoval.getState().equals(EntryState.APPLIED)).isFalse(); - var cbiAddressAfterHintRemoval = mergedEntityLog.getEntityLogEntry().stream().filter(re -> re.getType().equalsIgnoreCase("CBI_Address")).findAny().get(); + var cbiAddressAfterHintRemoval = mergedEntityLog.getEntityLogEntry() + .stream() + .filter(re -> re.getType().equalsIgnoreCase("CBI_Address")) + .findAny() + .get(); assertThat(cbiAddressAfterHintRemoval.getState().equals(EntryState.APPLIED)).isTrue(); } @@ -386,9 +400,10 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - entityLog.getEntityLogEntry().forEach(entry -> { - duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry); - }); + entityLog.getEntityLogEntry() + .forEach(entry -> { + duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry); + }); duplicates.forEach((id, redactionLogEntries) -> assertThat(redactionLogEntries.size()).isEqualTo(1)); @@ -421,11 +436,11 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { AnalyzeRequest request = uploadFileToStorage(fileName); request.setFileAttributes(List.of(FileAttribute.builder() - .id("fileAttributeId") - .label("Vertebrate Study") - .placeholder("{fileattributes.vertebrateStudy}") - .value("true") - .build())); + .id("fileAttributeId") + .label("Vertebrate Study") + .placeholder("{fileattributes.vertebrateStudy}") + .value("true") + .build())); analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); AnalyzeResult result = analyzeService.analyze(request); @@ -449,7 +464,10 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { correctFound++; continue loop; } - if (Objects.equals(entityLogEntry.getContainingNodeId().get(0), section.getTreeId().get(0))) { + if (Objects.equals(entityLogEntry.getContainingNodeId() + .get(0), + section.getTreeId() + .get(0))) { String value = section.getTextBlock().subSequence(new TextRange(entityLogEntry.getStartOffset(), entityLogEntry.getEndOffset())).toString(); if (entityLogEntry.getValue().equalsIgnoreCase(value)) { correctFound++; @@ -481,12 +499,12 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { ManualRedactions manualRedactions = new ManualRedactions(); manualRedactions.setEntriesToAdd(Set.of(ManualRedactionEntry.builder() - .value("Redact") - .addToDictionary(true) - .addToDossierDictionary(true) - .positions(List.of(new Rectangle(new Point(95.96979999999999f, 515.7984f), 19.866899999999987f, 46.953f, 2))) - .type("dossier_redaction") - .build())); + .value("Redact") + .addToDictionary(true) + .addToDossierDictionary(true) + .positions(List.of(new Rectangle(new Point(95.96979999999999f, 515.7984f), 19.866899999999987f, 46.953f, 2))) + .type("dossier_redaction") + .build())); request.setManualRedactions(manualRedactions); @@ -548,7 +566,11 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - var changes = entityLog.getEntityLogEntry().stream().filter(entry -> entry.getValue() != null && entry.getValue().equals("report")).findFirst().get().getChanges(); + var changes = entityLog.getEntityLogEntry() + .stream() + .filter(entry -> entry.getValue() != null && entry.getValue().equals("report")) + .findFirst() + .get().getChanges(); assertThat(changes.size()).isEqualTo(2); @@ -568,18 +590,18 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { ClassPathResource responseJson = new ClassPathResource("files/crafted_document.NER_ENTITIES.json"); storageService.storeObject(TenantContext.getTenantId(), - RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), - responseJson.getInputStream()); + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), + responseJson.getInputStream()); long start = System.currentTimeMillis(); AnalyzeRequest request = uploadFileToStorage(fileName); request.setFileAttributes(List.of(FileAttribute.builder() - .id("fileAttributeId") - .label("Vertebrate Study") - .placeholder("{fileattributes.vertebrateStudy}") - .value("true") - .build())); + .id("fileAttributeId") + .label("Vertebrate Study") + .placeholder("{fileattributes.vertebrateStudy}") + .value("true") + .build())); analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); AnalyzeResult result = analyzeService.analyze(request); @@ -601,7 +623,11 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { .map(redactionLogEntry -> new TextRange(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset())) .map(boundary -> documentGraph.getTextBlock().subSequence(boundary).toString()) .toList(); - List valuesInRedactionLog = entityLog.getEntityLogEntry().stream().filter(e -> !e.getEntryType().equals(EntryType.IMAGE)).map(EntityLogEntry::getValue).toList(); + List valuesInRedactionLog = entityLog.getEntityLogEntry() + .stream() + .filter(e -> !e.getEntryType().equals(EntryType.IMAGE)) + .map(EntityLogEntry::getValue) + .toList(); assertEquals(valuesInRedactionLog, valuesInDocument); @@ -628,11 +654,12 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { ManualRedactions manualRedactions = new ManualRedactions(); manualRedactions.setRecategorizations(Set.of(ManualRecategorization.builder() - .annotationId("37eee3e9d589a5cc529bfec38c3ba479") - .fileId("fileId") - .type("signature") - .requestDate(OffsetDateTime.now()) - .build())); + .annotationId("37eee3e9d589a5cc529bfec38c3ba479") + .fileId("fileId") + .type("signature") + .requestDate(OffsetDateTime.now()) + .user("user") + .build())); request.setManualRedactions(manualRedactions); @@ -683,40 +710,43 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { ManualRedactions manualRedactions = new ManualRedactions(); manualRedactions.getIdsToRemove() .add(IdRemoval.builder() - .annotationId("308dab9015bfafd911568cffe0a7f7de") - .fileId(TEST_FILE_ID) - .requestDate(OffsetDateTime.of(2022, 05, 23, 8, 30, 07, 475479, ZoneOffset.UTC)) - .processedDate(OffsetDateTime.of(2022, 05, 23, 8, 30, 07, 483651, ZoneOffset.UTC)) - .build()); + .annotationId("308dab9015bfafd911568cffe0a7f7de") + .fileId(TEST_FILE_ID) + .requestDate(OffsetDateTime.of(2022, 05, 23, 8, 30, 07, 475479, ZoneOffset.UTC)) + .user("user") + .processedDate(OffsetDateTime.of(2022, 05, 23, 8, 30, 07, 483651, ZoneOffset.UTC)) + .build()); manualRedactions.getForceRedactions() .add(ManualForceRedaction.builder() - .annotationId("0b56ea1a87c83f351df177315af94f0d") - .fileId(TEST_FILE_ID) - .legalBasis("Something") - .requestDate(OffsetDateTime.of(2022, 05, 23, 9, 30, 15, 4653, ZoneOffset.UTC)) - .processedDate(OffsetDateTime.of(2022, 05, 23, 9, 30, 15, 794, ZoneOffset.UTC)) - .build()); + .annotationId("0b56ea1a87c83f351df177315af94f0d") + .fileId(TEST_FILE_ID) + .legalBasis("Something") + .user("user") + .requestDate(OffsetDateTime.of(2022, 05, 23, 9, 30, 15, 4653, ZoneOffset.UTC)) + .processedDate(OffsetDateTime.of(2022, 05, 23, 9, 30, 15, 794, ZoneOffset.UTC)) + .build()); manualRedactions.getIdsToRemove() .add(IdRemoval.builder() - .annotationId("0b56ea1a87c83f351df177315af94f0d") - .fileId(TEST_FILE_ID) - .requestDate(OffsetDateTime.of(2022, 05, 23, 8, 30, 23, 961721, ZoneOffset.UTC)) - .processedDate(OffsetDateTime.of(2022, 05, 23, 8, 30, 23, 96528, ZoneOffset.UTC)) - .build()); + .annotationId("0b56ea1a87c83f351df177315af94f0d") + .fileId(TEST_FILE_ID) + .user("user") + .requestDate(OffsetDateTime.of(2022, 05, 23, 8, 30, 23, 961721, ZoneOffset.UTC)) + .processedDate(OffsetDateTime.of(2022, 05, 23, 8, 30, 23, 96528, ZoneOffset.UTC)) + .build()); request.setManualRedactions(manualRedactions); AnalyzeResult result = analyzeService.analyze(request); AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder() - .manualRedactions(manualRedactions) - .colors(colors) - .types(types) - .dossierId(TEST_DOSSIER_ID) - .fileId(TEST_FILE_ID) - .build()); + .manualRedactions(manualRedactions) + .colors(colors) + .types(types) + .dossierId(TEST_DOSSIER_ID) + .fileId(TEST_FILE_ID) + .build()); try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) { fileOutputStream.write(annotateResponse.getDocument()); @@ -921,6 +951,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { .textBefore("") .updateDictionary(false) .textAfter("") + .user("user") .build(); manualRedactions.getResizeRedactions().add(manualResizeRedaction); @@ -932,12 +963,12 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder() - .manualRedactions(manualRedactions) - .colors(colors) - .types(types) - .dossierId(TEST_DOSSIER_ID) - .fileId(TEST_FILE_ID) - .build()); + .manualRedactions(manualRedactions) + .colors(colors) + .types(types) + .dossierId(TEST_DOSSIER_ID) + .fileId(TEST_FILE_ID) + .build()); try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) { fileOutputStream.write(annotateResponse.getDocument()); @@ -960,15 +991,16 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { var redactionLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - redactionLog.getEntityLogEntry().forEach(entry -> { - if (!entry.getEntryType().equals(EntryType.HINT)) { - if (entry.getType().equals("CBI_author")) { - assertThat(entry.getReason()).isEqualTo("Not redacted because it's row does not belong to a vertebrate study"); - } else if (entry.getType().equals("CBI_address")) { - assertThat(entry.getReason()).isEqualTo("No vertebrate found"); - } - } - }); + redactionLog.getEntityLogEntry() + .forEach(entry -> { + if (!entry.getEntryType().equals(EntryType.HINT)) { + if (entry.getType().equals("CBI_author")) { + assertThat(entry.getReason()).isEqualTo("Not redacted because it's row does not belong to a vertebrate study"); + } else if (entry.getType().equals("CBI_address")) { + assertThat(entry.getReason()).isEqualTo("No vertebrate found"); + } + } + }); } @@ -1005,18 +1037,20 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { String manualAddId = UUID.randomUUID().toString(); manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder() - .annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf") - .fileId("fileId") - .processedDate(OffsetDateTime.now()) - .requestDate(OffsetDateTime.now()) - .build())); + .annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf") + .fileId("fileId") + .user("user") + .processedDate(OffsetDateTime.now()) + .requestDate(OffsetDateTime.now()) + .build())); manualRedactions.setForceRedactions(Set.of(ManualForceRedaction.builder() - .annotationId("675eba69b0c2917de55462c817adaa05") - .fileId("fileId") - .legalBasis("Something") - .requestDate(OffsetDateTime.now()) - .processedDate(OffsetDateTime.now()) - .build())); + .annotationId("675eba69b0c2917de55462c817adaa05") + .fileId("fileId") + .user("user") + .legalBasis("Something") + .requestDate(OffsetDateTime.now()) + .processedDate(OffsetDateTime.now()) + .build())); ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry(); manualRedactionEntry.setAnnotationId(manualAddId); @@ -1027,7 +1061,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { manualRedactionEntry.setProcessedDate(OffsetDateTime.now()); manualRedactionEntry.setRequestDate(OffsetDateTime.now()); manualRedactionEntry.setPositions(List.of(Rectangle.builder().topLeftX(375.61096f).topLeftY(241.282f).width(7.648041f).height(43.72262f).page(1).build(), - Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build())); + Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build())); // manualRedactions.getEntriesToAdd().add(manualRedactionEntry); @@ -1038,39 +1072,63 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { manualRedactions.getEntriesToAdd().add(manualRedactionEntry); manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder() - .annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf") - .fileId("fileId") - .requestDate(OffsetDateTime.now()) - .processedDate(OffsetDateTime.now()) - .build())); + .annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf") + .fileId("fileId") + .requestDate(OffsetDateTime.now()) + .processedDate(OffsetDateTime.now()) + .build())); manualRedactions.setLegalBasisChanges((Set.of(ManualLegalBasisChange.builder() - .annotationId("675eba69b0c2917de55462c817adaa05") - .fileId("fileId") - .legalBasis("Manual Legal Basis Change") - .processedDate(OffsetDateTime.now()) - .requestDate(OffsetDateTime.now()) - .build()))); + .annotationId("675eba69b0c2917de55462c817adaa05") + .fileId("fileId") + .legalBasis("Manual Legal Basis Change") + .processedDate(OffsetDateTime.now()) + .requestDate(OffsetDateTime.now()) + .build()))); manualRedactions.setResizeRedactions(Set.of(ManualResizeRedaction.builder() - .annotationId("fc287b74be2421156ab2895c7474ccdd") - .fileId("fileId") - .processedDate(OffsetDateTime.now()) - .requestDate(OffsetDateTime.now()) - .value("Syngenta Crop Protection AG, Basel, Switzerland RCC Ltd., Itingen, Switzerland") - .positions(List.of(Rectangle.builder().topLeftX(289.44595f).topLeftY(327.567f).width(7.648041f).height(82.51475f).page(1).build(), - Rectangle.builder().topLeftX(298.67056f).topLeftY(327.567f).width(7.648041f).height(75.32377f).page(1).build(), - Rectangle.builder().topLeftX(307.89517f).topLeftY(327.567f).width(7.648041f).height(61.670967f).page(1).build(), - Rectangle.builder().topLeftX(316.99985f).topLeftY(327.567f).width(7.648041f).height(38.104286f).page(1).build())) - .updateDictionary(false) - .build())); + .annotationId("fc287b74be2421156ab2895c7474ccdd") + .fileId("fileId") + .processedDate(OffsetDateTime.now()) + .requestDate(OffsetDateTime.now()) + .value("Syngenta Crop Protection AG, Basel, Switzerland RCC Ltd., Itingen, Switzerland") + .positions(List.of(Rectangle.builder() + .topLeftX(289.44595f) + .topLeftY(327.567f) + .width(7.648041f) + .height(82.51475f) + .page(1) + .build(), + Rectangle.builder() + .topLeftX(298.67056f) + .topLeftY(327.567f) + .width(7.648041f) + .height(75.32377f) + .page(1) + .build(), + Rectangle.builder() + .topLeftX(307.89517f) + .topLeftY(327.567f) + .width(7.648041f) + .height(61.670967f) + .page(1) + .build(), + Rectangle.builder() + .topLeftX(316.99985f) + .topLeftY(327.567f) + .width(7.648041f) + .height(38.104286f) + .page(1) + .build())) + .updateDictionary(false) + .build())); analyzeService.reanalyze(request); AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder() - .dossierId(TEST_DOSSIER_ID) - .fileId(TEST_FILE_ID) - .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) - .manualRedactions(manualRedactions) - .build()); + .dossierId(TEST_DOSSIER_ID) + .fileId(TEST_FILE_ID) + .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) + .manualRedactions(manualRedactions) + .build()); try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) { fileOutputStream.write(annotateResponse.getDocument()); @@ -1110,8 +1168,8 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { AnalyzeRequest request = uploadFileToStorage("files/ImportedRedactions/RotateTestFile_without_highlights.pdf"); storageService.storeObject(TenantContext.getTenantId(), - RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), - importedRedactions.getInputStream()); + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), + importedRedactions.getInputStream()); analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); AnalyzeResult result = analyzeService.analyze(request); @@ -1124,17 +1182,18 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { fileOutputStream.write(annotateResponse.getDocument()); } - entityLog.getEntityLogEntry().forEach(entry -> { - if (entry.getValue() == null) { - return; - } - if (entry.getValue().equals("David")) { - assertThat(entry.getImportedRedactionIntersections()).hasSize(1); - } - if (entry.getValue().equals("annotation")) { - assertThat(entry.getImportedRedactionIntersections()).isEmpty(); - } - }); + entityLog.getEntityLogEntry() + .forEach(entry -> { + if (entry.getValue() == null) { + return; + } + if (entry.getValue().equals("David")) { + assertThat(entry.getImportedRedactionIntersections()).hasSize(1); + } + if (entry.getValue().equals("annotation")) { + assertThat(entry.getImportedRedactionIntersections()).isEmpty(); + } + }); } @@ -1163,7 +1222,10 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { } var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - var values = entityLog.getEntityLogEntry().stream().map(EntityLogEntry::getValue).collect(Collectors.toList()); + var values = entityLog.getEntityLogEntry() + .stream() + .map(EntityLogEntry::getValue) + .collect(Collectors.toList()); assertThat(values).contains("Mrs. Robinson"); assertThat(values).contains("Mr. Bojangles"); @@ -1178,8 +1240,8 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { ClassPathResource imageServiceResponseFileResource = new ClassPathResource("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1 (1).IMAGE_INFO.json"); storageService.storeObject(TenantContext.getTenantId(), - RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMAGE_INFO), - imageServiceResponseFileResource.getInputStream()); + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMAGE_INFO), + imageServiceResponseFileResource.getInputStream()); System.out.println("Start Full integration test"); analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); @@ -1188,23 +1250,27 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { System.out.println("Finished analysis"); request.setManualRedactions(ManualRedactions.builder() - .legalBasisChanges(Set.of(ManualLegalBasisChange.builder() - .annotationId("3029651d0842a625f2d23f8375c23600") - .section("[19, 2]: Paragraph: Contact point: LexCo Contact:") - .value("0049 331 441 551 14") - .requestDate(OffsetDateTime.now()) - .fileId(TEST_FILE_ID) - .legalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002") - .build())) - .build()); + .legalBasisChanges(Set.of(ManualLegalBasisChange.builder() + .annotationId("3029651d0842a625f2d23f8375c23600") + .section("[19, 2]: Paragraph: Contact point: LexCo Contact:") + .value("0049 331 441 551 14") + .requestDate(OffsetDateTime.now()) + .fileId(TEST_FILE_ID) + .legalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002") + .user("user") + .build())) + .build()); analyzeService.reanalyze(request); System.out.println("Finished reanalysis"); var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - entityLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getType().equals("signature")).forEach(entityLogEntry -> { - assertThat(entityLogEntry.getState() == EntryState.APPLIED).isTrue(); - }); + entityLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getType().equals("signature")) + .forEach(entityLogEntry -> { + assertThat(entityLogEntry.getState() == EntryState.APPLIED).isTrue(); + }); } @@ -1215,8 +1281,8 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { ClassPathResource imageServiceResponseFileResource = new ClassPathResource("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1 (1).IMAGE_INFO.json"); storageService.storeObject(TenantContext.getTenantId(), - RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMAGE_INFO), - imageServiceResponseFileResource.getInputStream()); + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMAGE_INFO), + imageServiceResponseFileResource.getInputStream()); System.out.println("Start Full integration test"); analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); @@ -1225,21 +1291,23 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { System.out.println("Finished analysis"); request.setManualRedactions(ManualRedactions.builder() - .legalBasisChanges(Set.of(ManualLegalBasisChange.builder() - .annotationId("3029651d0842a625f2d23f8375c23600") - .section("[19, 2]: Paragraph: Contact point: LexCo Contact:") - .value("0049 331 441 551 14") - .requestDate(OffsetDateTime.now()) - .fileId(TEST_FILE_ID) - .legalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002") - .build())) - .recategorizations(Set.of(ManualRecategorization.builder() - .annotationId("3029651d0842a625f2d23f8375c23600") - .type("CBI_author") - .requestDate(OffsetDateTime.now()) - .fileId(TEST_FILE_ID) - .build())) - .build()); + .legalBasisChanges(Set.of(ManualLegalBasisChange.builder() + .annotationId("3029651d0842a625f2d23f8375c23600") + .section("[19, 2]: Paragraph: Contact point: LexCo Contact:") + .value("0049 331 441 551 14") + .requestDate(OffsetDateTime.now()) + .fileId(TEST_FILE_ID) + .legalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002") + .user("user") + .build())) + .recategorizations(Set.of(ManualRecategorization.builder() + .annotationId("3029651d0842a625f2d23f8375c23600") + .type("CBI_author") + .requestDate(OffsetDateTime.now()) + .fileId(TEST_FILE_ID) + .user("user") + .build())) + .build()); analyzeService.reanalyze(request); System.out.println("Finished reanalysis"); @@ -1266,11 +1334,11 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { String manualAddId2 = UUID.randomUUID().toString(); List positions = List.of(Rectangle.builder().topLeftX(305.35f).topLeftY(332.5033f).width(71.40744f).height(13.645125f).page(1).build()); ManualRedactionEntry manualRedactionEntry = getManualRedactionEntry(manualAddId, - positions, - "the manufacturing or production process, including the method and innovative aspects thereof, as well as other technical and industrial specifications inherent to that process or method, except for information which is relevant to the assessment of safety"); + positions, + "the manufacturing or production process, including the method and innovative aspects thereof, as well as other technical and industrial specifications inherent to that process or method, except for information which is relevant to the assessment of safety"); ManualRedactionEntry manualRedactionEntry2 = getManualRedactionEntry(manualAddId2, - positions, - "commercial information revealing sourcing, market shares or business strategy of the applicant"); + positions, + "commercial information revealing sourcing, market shares or business strategy of the applicant"); IdRemoval idRemoval = getIdRemoval(manualAddId); IdRemoval idRemoval2 = getIdRemoval(manualAddId2); @@ -1282,55 +1350,101 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - assertTrue(entityLog.getEntityLogEntry().stream().anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))); - assertEquals(entityLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)).findFirst().get().getState(), EntryState.APPLIED); + assertTrue(entityLog.getEntityLogEntry() + .stream() + .anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))); + assertEquals(entityLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)) + .findFirst() + .get().getState(), EntryState.APPLIED); request.setManualRedactions(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry)).idsToRemove(Set.of(idRemoval)).build()); analyzeService.reanalyze(request); entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - assertTrue(entityLog.getEntityLogEntry().stream().anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))); - assertEquals(entityLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)).findFirst().get().getState(), EntryState.REMOVED); + assertTrue(entityLog.getEntityLogEntry() + .stream() + .anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))); + assertEquals(entityLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)) + .findFirst() + .get().getState(), EntryState.REMOVED); request.setManualRedactions(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry, manualRedactionEntry2)).idsToRemove(Set.of(idRemoval)).build()); analyzeService.reanalyze(request); entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - assertTrue(entityLog.getEntityLogEntry().stream().anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))); - assertEquals(entityLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)).findFirst().get().getState(), EntryState.REMOVED); - assertTrue(entityLog.getEntityLogEntry().stream().anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2))); - assertEquals(entityLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2)).findFirst().get().getState(), EntryState.APPLIED); + assertTrue(entityLog.getEntityLogEntry() + .stream() + .anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))); + assertEquals(entityLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)) + .findFirst() + .get().getState(), EntryState.REMOVED); + assertTrue(entityLog.getEntityLogEntry() + .stream() + .anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2))); + assertEquals(entityLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2)) + .findFirst() + .get().getState(), EntryState.APPLIED); request.setManualRedactions(ManualRedactions.builder() - .entriesToAdd(Set.of(manualRedactionEntry, manualRedactionEntry2)) - .idsToRemove(Set.of(idRemoval, idRemoval2)) - .build()); + .entriesToAdd(Set.of(manualRedactionEntry, manualRedactionEntry2)) + .idsToRemove(Set.of(idRemoval, idRemoval2)) + .build()); analyzeService.reanalyze(request); entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - assertTrue(entityLog.getEntityLogEntry().stream().anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))); - assertEquals(entityLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)).findFirst().get().getState(), EntryState.REMOVED); - assertTrue(entityLog.getEntityLogEntry().stream().anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2))); - assertEquals(entityLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2)).findFirst().get().getState(), EntryState.REMOVED); + assertTrue(entityLog.getEntityLogEntry() + .stream() + .anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))); + assertEquals(entityLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)) + .findFirst() + .get().getState(), EntryState.REMOVED); + assertTrue(entityLog.getEntityLogEntry() + .stream() + .anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2))); + assertEquals(entityLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2)) + .findFirst() + .get().getState(), EntryState.REMOVED); manualRedactionEntry.setRequestDate(OffsetDateTime.now()); - request.setManualRedactions(ManualRedactions.builder() - .entriesToAdd(Set.of(manualRedactionEntry, manualRedactionEntry2)) - .idsToRemove(Set.of(idRemoval, idRemoval2)) - .build()); + request.setManualRedactions(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry, manualRedactionEntry2)).idsToRemove(Set.of(idRemoval2)).build()); analyzeService.reanalyze(request); entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - assertTrue(entityLog.getEntityLogEntry().stream().anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))); - assertEquals(entityLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)).findFirst().get().getState(), EntryState.APPLIED); - assertTrue(entityLog.getEntityLogEntry().stream().anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2))); - assertEquals(entityLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2)).findFirst().get().getState(), EntryState.REMOVED); + assertTrue(entityLog.getEntityLogEntry() + .stream() + .anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))); + assertEquals(entityLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)) + .findFirst() + .get().getState(), EntryState.APPLIED); + assertTrue(entityLog.getEntityLogEntry() + .stream() + .anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2))); + assertEquals(entityLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2)) + .findFirst() + .get().getState(), EntryState.REMOVED); } + @Test @SneakyThrows public void testResizeWithUpdateDictionaryTrue() { @@ -1342,23 +1456,40 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { analyzeService.analyze(request); var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - var david = entityLog.getEntityLogEntry().stream().filter(e -> e.getValue().equals("David")).findFirst().get(); + var david = entityLog.getEntityLogEntry() + .stream() + .filter(e -> e.getValue().equals("David")) + .findFirst() + .get(); request.setManualRedactions(ManualRedactions.builder() - .resizeRedactions(Set.of(ManualResizeRedaction.builder() - .updateDictionary(true) - .annotationId(david.getId()) - .requestDate(OffsetDateTime.now()) - .value("David Ksenia") - .positions(List.of(Rectangle.builder().topLeftX(56.8f).topLeftY(293.564f).width(65.592f).height(15.408f).page(1).build())) - .addToAllDossiers(false) - .build())) - .build()); + .resizeRedactions(Set.of(ManualResizeRedaction.builder() + .updateDictionary(true) + .annotationId(david.getId()) + .fileId(TEST_FILE_ID) + .user("user") + .requestDate(OffsetDateTime.now()) + .value("David Ksenia") + .positions(List.of(Rectangle.builder() + .topLeftX(56.8f) + .topLeftY(293.564f) + .width(65.592f) + .height(15.408f) + .page(1) + .build())) + .addToAllDossiers(false) + .build())) + .build()); analyzeService.reanalyze(request); entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - var resizedEntity = entityLog.getEntityLogEntry().stream().filter(e -> e.getId().equals(david.getId())).findFirst().get(); + var resizedEntity = entityLog.getEntityLogEntry() + .stream() + .filter(e -> e.getId().equals(david.getId())) + .findFirst() + .get(); assertEquals(resizedEntity.getState(), EntryState.APPLIED); - assertEquals(resizedEntity.getValue(), "David Ksenia"); + assertEquals(resizedEntity.getValue(), "David"); + assertEquals(0, resizedEntity.getManualChanges().size()); } @@ -1367,8 +1498,10 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { return IdRemoval.builder() .annotationId(id) .removeFromAllDossiers(false) + .fileId(TEST_FILE_ID) + .user("user") .removeFromDictionary(false) - .requestDate(OffsetDateTime.now()) + .requestDate(OffsetDateTime.now()) .build(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java new file mode 100644 index 00000000..ac33da46 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java @@ -0,0 +1,160 @@ +package com.iqser.red.service.redaction.v1.server.document.graph; + +import static com.iqser.red.service.redaction.v1.server.utils.EntityVisualizationUtility.ENTITY_LAYER; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.awt.Color; +import java.io.File; +import java.nio.file.Path; +import java.util.List; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; +import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; +import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; +import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService; +import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichmentService; +import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; +import com.iqser.red.service.redaction.v1.server.utils.EntityVisualizationUtility; +import com.knecon.fforesight.service.viewerdoc.model.Visualizations; +import com.knecon.fforesight.service.viewerdoc.service.ViewerDocumentService; +import com.knecon.fforesight.tenantcommons.TenantContext; + +import lombok.SneakyThrows; + +public class TableTest extends BuildDocumentIntegrationTest { + + private static final boolean DRAW_FILE = false; + + @Autowired + private EntityEnrichmentService entityEnrichmentService; + + private EntityCreationService entityCreationService; + + private static final String TYPE_1 = "type1"; + private static final String TYPE_2 = "type2"; + private static final String TYPE_3 = "type3"; + private static final String TYPE_4 = "type4"; + + private Table table; + + private Set entities; + + + @SneakyThrows + @BeforeEach + public void createTable() { + + entityCreationService = new EntityCreationService(entityEnrichmentService); + + String fileName = "files/Minimal Examples/BasicTable.pdf"; + + Document document = buildGraph(fileName); + + table = (Table) document.streamAllSubNodesOfType(NodeType.TABLE) + .findAny() + .orElseThrow(); + + entities = List.of(// + entityCreationService.byString("Cell11", TYPE_1, EntityType.ENTITY, document), + entityCreationService.byString("Cell21", TYPE_1, EntityType.ENTITY, document), + entityCreationService.byString("Cell31", TYPE_1, EntityType.ENTITY, document), + entityCreationService.byString("Cell41", TYPE_1, EntityType.ENTITY, document), + entityCreationService.byString("Cell51", TYPE_1, EntityType.ENTITY, document), + + entityCreationService.byString("Cell12", TYPE_2, EntityType.ENTITY, document), + entityCreationService.byString("Cell32", TYPE_2, EntityType.ENTITY, document), + entityCreationService.byString("Cell42", TYPE_2, EntityType.ENTITY, document), + + entityCreationService.byString("Cell23", TYPE_3, EntityType.ENTITY, document), + entityCreationService.byString("Cell53", TYPE_3, EntityType.ENTITY, document), + + entityCreationService.byString("Cell14", TYPE_4, EntityType.ENTITY, document), + entityCreationService.byString("Cell34", TYPE_4, EntityType.ENTITY, document)) + .stream() + .flatMap(Function.identity()) + .collect(Collectors.toSet()); + + if (DRAW_FILE) { + File file = new File("/tmp/" + Path.of(fileName).getFileName().toString()); + storageService.downloadTo(TenantContext.getTenantId(), + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.VIEWER_DOCUMENT), + file); + ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null); + + var visualizationsOnPage = EntityVisualizationUtility.createVisualizationsOnPage(document.getEntities(), Color.MAGENTA); + + viewerDocumentService.addVisualizationsOnPage(file, + file, + Visualizations.builder() + .layer(ENTITY_LAYER) + .visualizationsOnPages(visualizationsOnPage) + .layerVisibilityDefaultValue(true) + .build()); + } + + } + + + @Test + public void testStreamEntitiesWhereRowContainsEntitiesOfType() { + + int type_2_count = table.getEntitiesOfType(TYPE_2).size(); + + assertEquals(type_2_count, + table.streamEntitiesWhereRowContainsEntitiesOfType(List.of(TYPE_1)) + .filter(textEntity -> textEntity.type().equals(TYPE_2)) + .count()); + + assertEquals(type_2_count, + table.streamEntitiesWhereRowContainsEntitiesOfType(List.of(TYPE_1, TYPE_4)) + .filter(textEntity -> textEntity.type().equals(TYPE_2)) + .count()); + + assertEquals(2, + table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of(TYPE_1, TYPE_4)) + .filter(textEntity -> textEntity.type().equals(TYPE_2)) + .count()); + + assertEquals(0, + table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of(TYPE_1, TYPE_3)) + .filter(textEntity -> textEntity.type().equals(TYPE_2)) + .count()); + + assertEquals(0, + table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of(TYPE_1, TYPE_3, TYPE_4)) + .filter(textEntity -> textEntity.type().equals(TYPE_2)) + .count()); + + assertEquals(type_2_count, + table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of()) + .filter(textEntity -> textEntity.type().equals(TYPE_2)) + .count()); + + assertEquals(3, + table.streamTextEntitiesInRow(1) + .count()); + + assertEquals(2, + table.streamTextEntitiesInRow(4) + .count()); + + assertEquals(5, + table.streamTextEntitiesInCol(1) + .count()); + + assertEquals(3, + table.streamTextEntitiesInRow(3) + .count()); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java index 128849f7..75e0a12c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java @@ -38,6 +38,7 @@ import com.iqser.red.commons.jackson.ObjectMapperFactory; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult; import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState; @@ -58,7 +59,6 @@ import com.iqser.red.service.redaction.v1.server.Application; import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest; import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse; import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; -import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils; import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper; @@ -127,15 +127,15 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder() - .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) - .type(DOSSIER_REDACTIONS_INDICATOR) - .dossierTemplateId(TEST_DOSSIER_ID) - .hexColor("#ffe187") - .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .build())); + .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(DOSSIER_REDACTIONS_INDICATOR) + .dossierTemplateId(TEST_DOSSIER_ID) + .hexColor("#ffe187") + .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .build())); mockDictionaryCalls(null); @@ -155,29 +155,40 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { String testEntityValue1 = "Desiree"; String testEntityValue2 = "Melanie"; EntityLog redactionLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - assertEquals(2, redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue1)).count()); - assertEquals(2, redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue2)).count()); + assertEquals(2, + redactionLog.getEntityLogEntry() + .stream() + .filter(entry -> entry.getValue().equals(testEntityValue1)) + .count()); + assertEquals(2, + redactionLog.getEntityLogEntry() + .stream() + .filter(entry -> entry.getValue().equals(testEntityValue2)) + .count()); Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(TEST_DOSSIER_ID, TEST_FILE_ID)); String expandedEntityKeyword = "Lorem ipsum dolor sit amet, consectetur adipiscing elit Desiree et al sed do eiusmod tempor incididunt ut labore et dolore magna aliqua Melanie et al. Reference No 12345 Lorem ipsum."; - entityCreationService.byString(expandedEntityKeyword, "PII", EntityType.ENTITY, document).findFirst().get(); + entityCreationService.byString(expandedEntityKeyword, "PII", EntityType.ENTITY, document) + .findFirst() + .get(); String idToResize = redactionLog.getEntityLogEntry() .stream() .filter(entry -> entry.getValue().equals(testEntityValue1)) .max(Comparator.comparingInt(EntityLogEntry::getStartOffset)) - .get() - .getId(); + .get().getId(); ManualRedactions manualRedactions = new ManualRedactions(); - manualRedactions.getResizeRedactions().add(ManualResizeRedaction.builder() - .annotationId(idToResize) - .value(expandedEntityKeyword) - .positions(List.of(Rectangle.builder().topLeftX(56.8f).topLeftY(454.664f).height(15.408f).width(493.62f).page(3).build(), - Rectangle.builder().topLeftX(56.8f).topLeftY(440.864f).height(15.408f).width(396f).page(3).build())) - .addToAllDossiers(false) - .updateDictionary(false) - .requestDate(OffsetDateTime.now()) - .build()); + manualRedactions.getResizeRedactions() + .add(ManualResizeRedaction.builder() + .annotationId(idToResize) + .fileId(TEST_FILE_ID) + .value(expandedEntityKeyword) + .positions(List.of(Rectangle.builder().topLeftX(56.8f).topLeftY(454.664f).height(15.408f).width(493.62f).page(3).build(), + Rectangle.builder().topLeftX(56.8f).topLeftY(440.864f).height(15.408f).width(396f).page(3).build())) + .addToAllDossiers(false) + .updateDictionary(false) + .requestDate(OffsetDateTime.now()) + .build()); request.setManualRedactions(manualRedactions); analyzeService.reanalyze(request); @@ -188,21 +199,32 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { try (FileOutputStream fileOutputStream = new FileOutputStream(tmpFile)) { fileOutputStream.write(annotateResponse.getDocument()); } - EntityLogEntry resizedEntry = redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getValue().equals(expandedEntityKeyword)).findFirst().get(); + EntityLogEntry resizedEntry = redactionLog.getEntityLogEntry() + .stream() + .filter(entry -> entry.getValue().equals(expandedEntityKeyword)) + .findFirst() + .get(); assertEquals(idToResize, resizedEntry.getId()); - assertEquals(1, redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue1)).count()); assertEquals(1, - redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue2) && !entry.getState().equals(EntryState.REMOVED)).count()); + redactionLog.getEntityLogEntry() + .stream() + .filter(entry -> entry.getValue().equals(testEntityValue1)) + .count()); + assertEquals(1, + redactionLog.getEntityLogEntry() + .stream() + .filter(entry -> entry.getValue().equals(testEntityValue2) && !entry.getState().equals(EntryState.REMOVED)) + .count()); } private static com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle toAnnotationRectangle(Rectangle2D rectangle2D, int pageNumber) { return new com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle((float) rectangle2D.getMaxX(), - (float) rectangle2D.getMaxY() - (float) rectangle2D.getHeight(), - (float) rectangle2D.getWidth(), - -(float) rectangle2D.getHeight(), - pageNumber); + (float) rectangle2D.getMaxY() - (float) rectangle2D.getHeight(), + (float) rectangle2D.getWidth(), + -(float) rectangle2D.getHeight(), + pageNumber); } @@ -219,10 +241,10 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf").fileId("fileId").build())); manualRedactions.setForceRedactions(Set.of(ManualForceRedaction.builder() - .annotationId("675eba69b0c2917de55462c817adaa05") - .fileId("fileId") - .legalBasis("Something") - .build())); + .annotationId("675eba69b0c2917de55462c817adaa05") + .fileId("fileId") + .legalBasis("Something") + .build())); ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry(); manualRedactionEntry.setAnnotationId(manualAddId); @@ -232,7 +254,7 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { manualRedactionEntry.setValue("O'Loughlin C.K."); manualRedactionEntry.setReason("Manual Redaction"); manualRedactionEntry.setPositions(List.of(Rectangle.builder().topLeftX(375.61096f).topLeftY(241.282f).width(7.648041f).height(43.72262f).page(1).build(), - Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build())); + Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build())); AnalyzeRequest request = uploadFileToStorage(pdfFile); request.setManualRedactions(manualRedactions); @@ -242,11 +264,11 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { manualRedactions.getEntriesToAdd().add(manualRedactionEntry); manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf").fileId("fileId").build())); manualRedactions.setLegalBasisChanges((Set.of(ManualLegalBasisChange.builder() - .annotationId("675eba69b0c2917de55462c817adaa05") - .fileId("fileId") - .legalBasis("Manual Legal Basis Change") - .requestDate(OffsetDateTime.now()) - .build()))); + .annotationId("675eba69b0c2917de55462c817adaa05") + .fileId("fileId") + .legalBasis("Manual Legal Basis Change") + .requestDate(OffsetDateTime.now()) + .build()))); analyzeService.reanalyze(request); @@ -295,7 +317,10 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { fileOutputStream.write(annotateResponse.getDocument()); } long end = System.currentTimeMillis(); - var optionalEntry = redactionLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)).findAny(); + var optionalEntry = redactionLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)) + .findAny(); assertTrue(optionalEntry.isPresent()); assertEquals(2, optionalEntry.get().getContainingNodeId().size()); // 2 is the depth of the table instead of the table cell System.out.println("duration: " + (end - start)); @@ -318,6 +343,7 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { .filter(entry -> entry.getValue().equals("Oxford University Press")) .findFirst() .get(); + assertFalse(oxfordUniversityPress.getEngines().contains(Engine.MANUAL)); var asyaLyon = redactionLog.getEntityLogEntry() .stream() @@ -344,9 +370,9 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { EntityLog redactionLog2 = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); assertFalse(redactionLog2.getEntityLogEntry() - .stream() - .filter(entry -> entry.getType().equals("published_information")) - .anyMatch(entry -> entry.getValue().equals("Oxford University Press"))); + .stream() + .filter(entry -> entry.getType().equals("published_information")) + .anyMatch(entry -> entry.getValue().equals("Oxford University Press"))); var oxfordUniversityPressRecategorized = redactionLog2.getEntityLogEntry() .stream() @@ -364,6 +390,7 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { assertEquals(asyaLyon2.getState(), EntryState.APPLIED); assertEquals(1, oxfordUniversityPressRecategorized.getManualChanges().size()); + assertTrue(oxfordUniversityPressRecategorized.getEngines().contains(Engine.MANUAL)); } @@ -379,15 +406,15 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { String annotationId = "testAnnotationId"; manualRedactions.setEntriesToAdd(Set.of(ManualRedactionEntry.builder() - .annotationId(annotationId) - .requestDate(OffsetDateTime.now()) - .type("manual") - .value("Expand to Hint Clarissa’s Donut ← not added to Dict, should be not annotated Simpson's Tower ← added to Authors-Dict, should be annotated") - .positions(List.of(// - new Rectangle(new Point(56.8f, 496.27f), 61.25f, 12.83f, 2), // - new Rectangle(new Point(56.8f, 482.26f), 303.804f, 15.408f, 2), // - new Rectangle(new Point(56.8f, 468.464f), 314.496f, 15.408f, 2))) // - .build())); + .annotationId(annotationId) + .requestDate(OffsetDateTime.now()) + .type("manual") + .value("Expand to Hint Clarissa’s Donut ← not added to Dict, should be not annotated Simpson's Tower ← added to Authors-Dict, should be annotated") + .positions(List.of(// + new Rectangle(new Point(56.8f, 496.27f), 61.25f, 12.83f, 2), // + new Rectangle(new Point(56.8f, 482.26f), 303.804f, 15.408f, 2), // + new Rectangle(new Point(56.8f, 468.464f), 314.496f, 15.408f, 2))) // + .build())); ManualResizeRedaction manualResizeRedaction = ManualResizeRedaction.builder() .annotationId(annotationId) .requestDate(OffsetDateTime.now()) @@ -401,10 +428,58 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { analyzeService.reanalyze(request); EntityLog entityLog = redactionStorageService.getEntityLog(request.getDossierId(), request.getFileId()); - EntityLogEntry entityLogEntry = entityLog.getEntityLogEntry().stream().filter(entry -> entry.getId().equals(annotationId)).findFirst().orElseThrow(); + EntityLogEntry entityLogEntry = entityLog.getEntityLogEntry() + .stream() + .filter(entry -> entry.getId().equals(annotationId)) + .findFirst() + .orElseThrow(); assertEquals("Expand to Hint", entityLogEntry.getValue()); assertEquals(1, entityLogEntry.getPositions().size()); - assertEquals(ManualRedactionType.RESIZE, entityLogEntry.getManualChanges().get(entityLogEntry.getManualChanges().size() - 1).getManualRedactionType()); + assertEquals(ManualRedactionType.RESIZE, + entityLogEntry.getManualChanges() + .get(entityLogEntry.getManualChanges().size() - 1).getManualRedactionType()); + assertTrue(entityLogEntry.getEngines().contains(Engine.MANUAL)); + } + + + @Test + @SneakyThrows + public void testAddEngineManualToResizeDictionaryEntry() { + + String filePath = "files/new/crafted document.pdf"; + AnalyzeRequest request = uploadFileToStorage(filePath); + analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); + AnalyzeResult result = analyzeService.analyze(request); + ManualRedactions manualRedactions = new ManualRedactions(); + + EntityLog entityLog = redactionStorageService.getEntityLog(request.getDossierId(), request.getFileId()); + var dictionaryEntry = entityLog.getEntityLogEntry() + .stream() + .filter(entry -> entry.isDictionaryEntry() || entry.isDossierDictionaryEntry()) + .findFirst() + .get(); + ManualResizeRedaction manualResizeRedaction = ManualResizeRedaction.builder() + .annotationId(dictionaryEntry.getId()) + .requestDate(OffsetDateTime.now()) + .value("Image") + .positions(List.of(new Rectangle(new Point(56.8f, 496.27f), 61.25f, 12.83f, 1))) + .updateDictionary(true) + .build(); + manualRedactions.setResizeRedactions(Set.of(manualResizeRedaction)); + request.setManualRedactions(manualRedactions); + + analyzeService.reanalyze(request); + + entityLog = redactionStorageService.getEntityLog(request.getDossierId(), request.getFileId()); + EntityLogEntry entityLogEntry = entityLog.getEntityLogEntry() + .stream() + .filter(entry -> entry.getId().equals(dictionaryEntry.getId())) + .findFirst() + .orElseThrow(); + assertEquals(ManualRedactionType.RESIZE_IN_DICTIONARY, + entityLogEntry.getManualChanges() + .get(entityLogEntry.getManualChanges().size() - 1).getManualRedactionType()); + assertTrue(entityLogEntry.getEngines().contains(Engine.MANUAL)); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesIntegrationTest.java index 860f4d1b..ce91c661 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesIntegrationTest.java @@ -32,18 +32,33 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest { public void manualResizeRedactionTest() { Document document = buildGraph("files/new/crafted document"); - Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet()); + Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document) + .collect(Collectors.toUnmodifiableSet()); Set biggerEntities = entityCreationService.byString("David Ksenia Max Mustermann", "CBI_author", EntityType.ENTITY, document) .collect(Collectors.toUnmodifiableSet()); - TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); - TextEntity biggerEntity = biggerEntities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); + TextEntity entity = entities.stream() + .filter(e -> e.getPages() + .stream() + .anyMatch(p -> p.getNumber() == 1)) + .findFirst() + .get(); + TextEntity biggerEntity = biggerEntities.stream() + .filter(e -> e.getPages() + .stream() + .anyMatch(p -> p.getNumber() == 1)) + .findFirst() + .get(); - String initialId = entity.getPositionsOnPagePerPage().get(0).getId(); + String initialId = entity.getPositionsOnPagePerPage() + .get(0).getId(); ManualResizeRedaction manualResizeRedaction = ManualResizeRedaction.builder() .annotationId(initialId) + .fileId(TEST_FILE_ID) + .user("user") .value(biggerEntity.getValue()) - .positions(toAnnotationRectangles(biggerEntity.getPositionsOnPagePerPage().get(0))) + .positions(toAnnotationRectangles(biggerEntity.getPositionsOnPagePerPage() + .get(0))) .requestDate(OffsetDateTime.now()) .updateDictionary(false) .build(); @@ -55,8 +70,13 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest { assertTrue(Sets.difference(new HashSet<>(biggerEntity.getIntersectingNodes()), new HashSet<>(entity.getIntersectingNodes())).isEmpty()); assertEquals(biggerEntity.getPages(), entity.getPages()); assertEquals(biggerEntity.getValue(), entity.getValue()); - assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId()); - assertRectanglesAlmostEqual(biggerEntity.getPositionsOnPagePerPage().get(0).getRectanglePerLine(), entity.getPositionsOnPagePerPage().get(0).getRectanglePerLine()); + assertEquals(initialId, + entity.getPositionsOnPagePerPage() + .get(0).getId()); + assertRectanglesAlmostEqual(biggerEntity.getPositionsOnPagePerPage() + .get(0).getRectanglePerLine(), + entity.getPositionsOnPagePerPage() + .get(0).getRectanglePerLine()); assertTrue(entity.resized()); } @@ -65,12 +85,25 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest { public void manualForceRedactionTest() { Document document = buildGraph("files/new/crafted document"); - Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet()); + Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document) + .collect(Collectors.toUnmodifiableSet()); - TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); + TextEntity entity = entities.stream() + .filter(e -> e.getPages() + .stream() + .anyMatch(p -> p.getNumber() == 1)) + .findFirst() + .get(); - String initialId = entity.getPositionsOnPagePerPage().get(0).getId(); - ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder().annotationId(initialId).legalBasis("Something").requestDate(OffsetDateTime.now()).build(); + String initialId = entity.getPositionsOnPagePerPage() + .get(0).getId(); + ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder() + .annotationId(initialId) + .fileId(TEST_FILE_ID) + .user("user") + .legalBasis("Something") + .requestDate(OffsetDateTime.now()) + .build(); doAnalysis(document, List.of(manualForceRedaction)); @@ -78,8 +111,12 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest { assertFalse(entity.getIntersectingNodes().isEmpty()); assertEquals(1, entity.getPages().size()); assertEquals("David Ksenia", entity.getValue()); - assertEquals("Something", entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis())); - assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId()); + assertEquals("Something", + entity.getManualOverwrite().getLegalBasis() + .orElse(entity.getMatchedRule().getLegalBasis())); + assertEquals(initialId, + entity.getPositionsOnPagePerPage() + .get(0).getId()); assertFalse(entity.removed()); assertTrue(entity.hasManualChanges()); assertTrue(entity.applied()); @@ -90,17 +127,26 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest { public void manualIDRemovalTest() { Document document = buildGraph("files/new/crafted document"); - Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet()); + Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document) + .collect(Collectors.toUnmodifiableSet()); - TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); + TextEntity entity = entities.stream() + .filter(e -> e.getPages() + .stream() + .anyMatch(p -> p.getNumber() == 1)) + .findFirst() + .get(); - String initialId = entity.getPositionsOnPagePerPage().get(0).getId(); - IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).requestDate(OffsetDateTime.now()).build(); + String initialId = entity.getPositionsOnPagePerPage() + .get(0).getId(); + IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).requestDate(OffsetDateTime.now()).fileId(TEST_FILE_ID).user("user").build(); doAnalysis(document, List.of(idRemoval)); assertEquals("David Ksenia", entity.getValue()); - assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId()); + assertEquals(initialId, + entity.getPositionsOnPagePerPage() + .get(0).getId()); assertTrue(entity.ignored()); } @@ -109,13 +155,25 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest { public void manualIDRemovalButAlsoForceRedactionTest() { Document document = buildGraph("files/new/crafted document"); - Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet()); + Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document) + .collect(Collectors.toUnmodifiableSet()); - TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); + TextEntity entity = entities.stream() + .filter(e -> e.getPages() + .stream() + .anyMatch(p -> p.getNumber() == 1)) + .findFirst() + .get(); - String initialId = entity.getPositionsOnPagePerPage().get(0).getId(); - IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).requestDate(OffsetDateTime.now()).build(); - ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder().annotationId(initialId).legalBasis("Something").requestDate(OffsetDateTime.now()).build(); + String initialId = entity.getPositionsOnPagePerPage() + .get(0).getId(); + ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder() + .annotationId(initialId) + .legalBasis("Something") + .requestDate(OffsetDateTime.now()) + .fileId(TEST_FILE_ID) + .user("user") + .build(); doAnalysis(document, List.of(manualForceRedaction)); @@ -123,7 +181,9 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest { assertFalse(entity.getIntersectingNodes().isEmpty()); assertEquals(1, entity.getPages().size()); assertEquals("David Ksenia", entity.getValue()); - assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId()); + assertEquals(initialId, + entity.getPositionsOnPagePerPage() + .get(0).getId()); assertFalse(entity.removed()); assertFalse(entity.ignored()); } @@ -131,7 +191,9 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest { private void assertRectanglesAlmostEqual(Collection rects1, Collection rects2) { - if (rects1.stream().allMatch(rect1 -> rects2.stream().anyMatch(rect2 -> rectanglesAlmostEqual(rect1, rect2)))) { + if (rects1.stream() + .allMatch(rect1 -> rects2.stream() + .anyMatch(rect2 -> rectanglesAlmostEqual(rect1, rect2)))) { return; } // use this for nice formatting of error message @@ -143,15 +205,18 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest { double tolerance = 1e-1; return Math.abs(r1.getX() - r2.getX()) < tolerance &&// - Math.abs(r1.getY() - r2.getY()) < tolerance &&// - Math.abs(r1.getWidth() - r2.getWidth()) < tolerance &&// - Math.abs(r1.getHeight() - r2.getHeight()) < tolerance; + Math.abs(r1.getY() - r2.getY()) < tolerance &&// + Math.abs(r1.getWidth() - r2.getWidth()) < tolerance &&// + Math.abs(r1.getHeight() - r2.getHeight()) < tolerance; } private static List toAnnotationRectangles(PositionOnPage positionsOnPage) { - return positionsOnPage.getRectanglePerLine().stream().map(rectangle2D -> toAnnotationRectangle(rectangle2D, positionsOnPage.getPage().getNumber())).toList(); + return positionsOnPage.getRectanglePerLine() + .stream() + .map(rectangle2D -> toAnnotationRectangle(rectangle2D, positionsOnPage.getPage().getNumber())) + .toList(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesUnitTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesUnitTest.java index 85d8311c..dbf0a1c4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesUnitTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesUnitTest.java @@ -43,7 +43,9 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { OffsetDateTime start = OffsetDateTime.now(); String reason = "whatever"; Document document = buildGraphNoImages("files/new/crafted document.pdf"); - List entities = entityCreationService.byString("David Ksenia", "test", EntityType.ENTITY, document).peek(e -> e.apply("T.0.0", reason)).toList(); + List entities = entityCreationService.byString("David Ksenia", "test", EntityType.ENTITY, document) + .peek(e -> e.apply("T.0.0", reason)) + .toList(); assertFalse(entities.isEmpty()); TextEntity entity = entities.get(0); assertTrue(entity.active()); @@ -52,10 +54,11 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { assertFalse(entity.resized()); assertFalse(entity.ignored()); assertEquals("n-a", entity.getMatchedRule().getLegalBasis()); - String annotationId = entity.getPositionsOnPagePerPage().get(0).getId(); + String annotationId = entity.getPositionsOnPagePerPage() + .get(0).getId(); // remove first - IdRemoval removal = IdRemoval.builder().requestDate(start).fileId(TEST_FILE_ID).annotationId(annotationId).build(); + IdRemoval removal = IdRemoval.builder().requestDate(start).fileId(TEST_FILE_ID).user("user").annotationId(annotationId).build(); entity.getManualOverwrite().addChange(removal); assertTrue(entity.ignored()); assertFalse(entity.applied()); @@ -65,6 +68,7 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { ManualForceRedaction forceRedaction = ManualForceRedaction.builder() .requestDate(start.plusSeconds(1)) .fileId(TEST_FILE_ID) + .user("user") .annotationId(annotationId) .legalBasis("coolio") .build(); @@ -73,10 +77,12 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { assertFalse(entity.ignored()); assertFalse(entity.removed()); assertEquals(reason + ", removed by manual override, forced by manual override", entity.buildReasonWithManualChangeDescriptions()); - assertEquals("coolio", entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis())); + assertEquals("coolio", + entity.getManualOverwrite().getLegalBasis() + .orElse(entity.getMatchedRule().getLegalBasis())); // remove again - IdRemoval removal2 = IdRemoval.builder().requestDate(start.plusSeconds(3)).fileId(TEST_FILE_ID).annotationId(annotationId).build(); + IdRemoval removal2 = IdRemoval.builder().requestDate(start.plusSeconds(3)).fileId(TEST_FILE_ID).annotationId(annotationId).user("user").build(); entity.getManualOverwrite().addChange(removal2); assertTrue(entity.ignored()); assertFalse(entity.applied()); @@ -86,6 +92,7 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { ManualForceRedaction forceRedaction2 = ManualForceRedaction.builder() .requestDate(start.plusSeconds(2)) .fileId(TEST_FILE_ID) + .user("user") .annotationId(annotationId) .legalBasis("coolio") .build(); @@ -93,7 +100,7 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { assertTrue(entity.ignored()); assertFalse(entity.applied()); assertEquals(reason + ", removed by manual override, forced by manual override, forced by manual override, removed by manual override", - entity.buildReasonWithManualChangeDescriptions()); + entity.buildReasonWithManualChangeDescriptions()); String legalBasis = "Yeah"; String section = "Some random section!"; @@ -103,6 +110,7 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { .annotationId(annotationId) .requestDate(start.plusSeconds(4)) .section(section) + .fileId(TEST_FILE_ID) .user("peter") .value(value) .build(); @@ -110,16 +118,32 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { assertTrue(entity.ignored()); assertFalse(entity.applied()); assertEquals(reason + ", removed by manual override, forced by manual override, forced by manual override, removed by manual override, legal basis was manually changed", - entity.buildReasonWithManualChangeDescriptions()); - assertEquals(value, entity.getManualOverwrite().getValue().orElse(entity.getValue())); - assertEquals(legalBasis, entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis())); - assertEquals(section, entity.getManualOverwrite().getSection().orElse(entity.getDeepestFullyContainingNode().toString())); + entity.buildReasonWithManualChangeDescriptions()); + assertEquals(value, + entity.getManualOverwrite().getValue() + .orElse(entity.getValue())); + assertEquals(legalBasis, + entity.getManualOverwrite().getLegalBasis() + .orElse(entity.getMatchedRule().getLegalBasis())); + assertEquals(section, + entity.getManualOverwrite().getSection() + .orElse(entity.getDeepestFullyContainingNode().toString())); - ManualRecategorization imageRecategorizationRequest = ManualRecategorization.builder().type("type").requestDate(start.plusSeconds(5)).annotationId(annotationId).build(); + ManualRecategorization imageRecategorizationRequest = ManualRecategorization.builder() + .type("type") + .requestDate(start.plusSeconds(5)) + .fileId(TEST_FILE_ID) + .user("user") + .annotationId(annotationId) + .build(); entity.getManualOverwrite().addChange(imageRecategorizationRequest); - assertTrue(entity.getManualOverwrite().getRecategorized().isPresent()); - assertTrue(entity.getManualOverwrite().getRecategorized().get()); - assertEquals("type", entity.getManualOverwrite().getType().orElse(entity.type())); + assertTrue(entity.getManualOverwrite().getRecategorized() + .isPresent()); + assertTrue(entity.getManualOverwrite().getRecategorized() + .get()); + assertEquals("type", + entity.getManualOverwrite().getType() + .orElse(entity.type())); } @@ -129,7 +153,9 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { OffsetDateTime start = OffsetDateTime.now(); String reason = "whatever"; Document document = buildGraphNoImages("files/new/crafted document.pdf"); - List entities = entityCreationService.byString("David Ksenia", "test", EntityType.HINT, document).peek(e -> e.apply("T.0.0", reason)).toList(); + List entities = entityCreationService.byString("David Ksenia", "test", EntityType.HINT, document) + .peek(e -> e.apply("T.0.0", reason)) + .toList(); assertFalse(entities.isEmpty()); TextEntity entity = entities.get(0); assertTrue(entity.active()); @@ -138,10 +164,11 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { assertFalse(entity.resized()); assertFalse(entity.ignored()); assertEquals("n-a", entity.getMatchedRule().getLegalBasis()); - String annotationId = entity.getPositionsOnPagePerPage().get(0).getId(); + String annotationId = entity.getPositionsOnPagePerPage() + .get(0).getId(); // remove first - IdRemoval removal = IdRemoval.builder().requestDate(start).fileId(TEST_FILE_ID).annotationId(annotationId).build(); + IdRemoval removal = IdRemoval.builder().requestDate(start).fileId(TEST_FILE_ID).annotationId(annotationId).user("user").build(); entity.getManualOverwrite().addChange(removal); assertTrue(entity.ignored()); assertFalse(entity.applied()); @@ -152,6 +179,7 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { .requestDate(start.plusSeconds(1)) .fileId(TEST_FILE_ID) .annotationId(annotationId) + .user("user") .legalBasis("coolio") .build(); entity.getManualOverwrite().addChange(forceRedaction); @@ -159,7 +187,9 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { assertFalse(entity.ignored()); assertFalse(entity.removed()); assertEquals(reason + ", removed by manual override, forced by manual override", entity.buildReasonWithManualChangeDescriptions()); - assertEquals("coolio", entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis())); + assertEquals("coolio", + entity.getManualOverwrite().getLegalBasis() + .orElse(entity.getMatchedRule().getLegalBasis())); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/PrecursorEntityTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/PrecursorEntityTest.java index b10c8a6a..b6a1efcb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/PrecursorEntityTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/PrecursorEntityTest.java @@ -84,7 +84,7 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest { public void testFoundManualAddRedactionAndRemovedHasStateRemoved() { DocumentAndEntity context = createFoundManualRedaction(); - IdRemoval removal = IdRemoval.builder().requestDate(OffsetDateTime.now()).build(); + IdRemoval removal = IdRemoval.builder().annotationId("123").user("user").fileId(TEST_FILE_ID).requestDate(OffsetDateTime.now()).build(); context.entity().getManualOverwrite().addChange(removal); assertTrue(context.entity().removed()); } @@ -95,7 +95,7 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest { public void testNotFoundManualAddRedactionAndRemovedHasStateRemoved() { DocumentAndEntity context = createNotFoundManualRedaction(); - IdRemoval removal = IdRemoval.builder().requestDate(OffsetDateTime.now()).build(); + IdRemoval removal = IdRemoval.builder().fileId(TEST_FILE_ID).user("user").annotationId("123").requestDate(OffsetDateTime.now()).build(); context.entity().getManualOverwrite().addChange(removal); assertTrue(context.entity().removed()); } @@ -108,8 +108,11 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest { String value = "To: Syngenta Ltd. Jealott’s Hill"; String type = DICTIONARY_AUTHOR; ManualRedactionEntry manualRedactionEntry = ManualRedactionEntry.builder() + .annotationId("123") .type(type) .value(value) + .user("user") + .fileId(TEST_FILE_ID) .reason("reason") .legalBasis("n-a") .section("n-a") @@ -122,17 +125,20 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest { assertTrue(document.getEntities().isEmpty()); - List notFoundManualEntities = entityFromPrecursorCreationService.createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry)).build(), - document, - TEST_DOSSIER_TEMPLATE_ID); + List notFoundManualEntities = entityFromPrecursorCreationService.createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions.builder() + .entriesToAdd(Set.of( + manualRedactionEntry)) + .build(), + document, + TEST_DOSSIER_TEMPLATE_ID); assertEquals(1, notFoundManualEntities.size()); assertTrue(document.getEntities().isEmpty()); List redactionLogEntries = entityLogCreatorService.createInitialEntityLog(new AnalyzeRequest(), - document, - notFoundManualEntities, - new DictionaryVersion(), - 0L).getEntityLogEntry(); + document, + notFoundManualEntities, + new DictionaryVersion(), + 0L).getEntityLogEntry(); assertEquals(1, redactionLogEntries.size()); assertEquals(value, redactionLogEntries.get(0).getValue()); @@ -146,7 +152,8 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest { Document document = buildGraph("files/new/VV-919901.pdf"); EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService); - List tempEntities = entityCreationService.byString("To: Syngenta Ltd.", "temp", EntityType.ENTITY, document).toList(); + List tempEntities = entityCreationService.byString("To: Syngenta Ltd.", "temp", EntityType.ENTITY, document) + .toList(); assertFalse(tempEntities.isEmpty()); var tempEntity = tempEntities.get(0); List positions = tempEntity.getPositionsOnPagePerPage() @@ -158,8 +165,11 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest { ManualRedactionEntry manualRedactionEntry = ManualRedactionEntry.builder() .type("manual") + .annotationId("123") .value(tempEntity.getValue()) .reason("reason") + .user("user") + .fileId(TEST_FILE_ID) .legalBasis("n-a") .section(tempEntity.getDeepestFullyContainingNode().toString()) .rectangle(false) @@ -172,21 +182,28 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest { tempEntity.removeFromGraph(); assertTrue(document.getEntities().isEmpty()); - List notFoundManualEntities = entityFromPrecursorCreationService.createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry)).build(), - document, - TEST_DOSSIER_TEMPLATE_ID); + List notFoundManualEntities = entityFromPrecursorCreationService.createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions.builder() + .entriesToAdd(Set.of( + manualRedactionEntry)) + .build(), + document, + TEST_DOSSIER_TEMPLATE_ID); assertTrue(notFoundManualEntities.isEmpty()); assertEquals(1, document.getEntities().size()); - return new DocumentAndEntity(document, document.getEntities().stream().findFirst().get()); + return new DocumentAndEntity(document, + document.getEntities() + .stream() + .findFirst() + .get()); } public static Rectangle toAnnotationRectangle(Rectangle2D rectangle2D, int pageNumber) { return new Rectangle(new Point((float) rectangle2D.getMinX(), (float) (rectangle2D.getMinY() + rectangle2D.getHeight())), - (float) rectangle2D.getWidth(), - -(float) rectangle2D.getHeight(), - pageNumber); + (float) rectangle2D.getWidth(), + -(float) rectangle2D.getHeight(), + pageNumber); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/service/document/UnprocessedChangesServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/service/document/UnprocessedChangesServiceTest.java index 8eeba13c..d60b1c9f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/service/document/UnprocessedChangesServiceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/service/document/UnprocessedChangesServiceTest.java @@ -8,6 +8,7 @@ import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import java.time.OffsetDateTime; import java.util.List; import java.util.Optional; import java.util.Set; @@ -34,7 +35,6 @@ import org.springframework.test.context.junit.jupiter.SpringExtension; import com.iqser.red.commons.jackson.ObjectMapperFactory; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; @@ -84,6 +84,7 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT @SpyBean RabbitTemplate rabbitTemplate; + @BeforeEach public void stubClients() { @@ -101,21 +102,22 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, true)).thenReturn(List.of(Type.builder() - .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) - .type(DOSSIER_REDACTIONS_INDICATOR) - .dossierTemplateId(TEST_DOSSIER_ID) - .hexColor("#ffe187") - .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .build())); + .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(DOSSIER_REDACTIONS_INDICATOR) + .dossierTemplateId(TEST_DOSSIER_ID) + .hexColor("#ffe187") + .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .build())); mockDictionaryCalls(null); when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors); } + @Test @SneakyThrows public void testManualSurroundingText() { @@ -125,10 +127,20 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT ManualRedactions manualRedactions = new ManualRedactions(); var aoelId = UUID.randomUUID().toString(); - ManualRedactionEntry manualRedactionEntry = prepareManualRedactionEntry(aoelId, List.of(Rectangle.builder().topLeftX(355.53775f).topLeftY(266.1895f).width(29.32224f).height(10.048125f).page(1).build()), "AOEL"); + ManualRedactionEntry manualRedactionEntry = prepareManualRedactionEntry(aoelId, + List.of(Rectangle.builder() + .topLeftX(355.53775f) + .topLeftY(266.1895f) + .width(29.32224f) + .height(10.048125f) + .page(1) + .build()), + "AOEL"); var notFoundId = UUID.randomUUID().toString(); - ManualRedactionEntry manualRedactionEntry2 = prepareManualRedactionEntry(notFoundId, List.of(Rectangle.builder().topLeftX(1f).topLeftY(1f).width(1f).height(1f).page(1).build()), "Random"); + ManualRedactionEntry manualRedactionEntry2 = prepareManualRedactionEntry(notFoundId, + List.of(Rectangle.builder().topLeftX(1f).topLeftY(1f).width(1f).height(1f).page(1).build()), + "Random"); manualRedactions.getEntriesToAdd().add(manualRedactionEntry); manualRedactions.getEntriesToAdd().add(manualRedactionEntry2); @@ -147,30 +159,43 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT assertFalse(unprocessedManualEntities.isEmpty()); assertEquals(unprocessedManualEntities.size(), 2); - Optional optionalUnprocessedManualEntity = unprocessedManualEntities.stream().filter(manualEntity -> manualEntity.getAnnotationId().equals(aoelId)).findFirst(); + Optional optionalUnprocessedManualEntity = unprocessedManualEntities.stream() + .filter(manualEntity -> manualEntity.getAnnotationId().equals(aoelId)) + .findFirst(); assertTrue(optionalUnprocessedManualEntity.isPresent()); UnprocessedManualEntity unprocessedManualEntity = optionalUnprocessedManualEntity.get(); assertEquals(unprocessedManualEntity.getTextBefore(), "was above the "); assertEquals(unprocessedManualEntity.getTextAfter(), " without PPE (34%"); assertEquals(unprocessedManualEntity.getSection(), "[1, 1]: Paragraph: A9396G containing 960 g/L"); - assertEquals(unprocessedManualEntity.getPositions().get(0).x(), 355.53775f); - assertEquals(unprocessedManualEntity.getPositions().get(0).y(), 266.49002f); - assertEquals(unprocessedManualEntity.getPositions().get(0).w(), 29.322266f); - assertEquals(unprocessedManualEntity.getPositions().get(0).h(), 11.017679f); + assertEquals(unprocessedManualEntity.getPositions() + .get(0).x(), 355.53775f); + assertEquals(unprocessedManualEntity.getPositions() + .get(0).y(), 266.49002f); + assertEquals(unprocessedManualEntity.getPositions() + .get(0).w(), 29.322266f); + assertEquals(unprocessedManualEntity.getPositions() + .get(0).h(), 11.017679f); - Optional optionalNotFoundUnprocessedManualEntity = unprocessedManualEntities.stream().filter(manualEntity -> manualEntity.getAnnotationId().equals(notFoundId)).findFirst(); + Optional optionalNotFoundUnprocessedManualEntity = unprocessedManualEntities.stream() + .filter(manualEntity -> manualEntity.getAnnotationId().equals(notFoundId)) + .findFirst(); assertTrue(optionalNotFoundUnprocessedManualEntity.isPresent()); UnprocessedManualEntity unprocessedNotFoundManualEntity = optionalNotFoundUnprocessedManualEntity.get(); assertEquals(unprocessedNotFoundManualEntity.getTextBefore(), ""); assertEquals(unprocessedNotFoundManualEntity.getTextAfter(), ""); assertEquals(unprocessedNotFoundManualEntity.getSection(), ""); - assertEquals(unprocessedNotFoundManualEntity.getPositions().get(0).getPageNumber(), 1); - assertEquals(unprocessedNotFoundManualEntity.getPositions().get(0).getRectangle()[0], 1f); - assertEquals(unprocessedNotFoundManualEntity.getPositions().get(0).getRectangle()[1], 1f); - assertEquals(unprocessedNotFoundManualEntity.getPositions().get(0).getRectangle()[2], 1f); - assertEquals(unprocessedNotFoundManualEntity.getPositions().get(0).getRectangle()[3], 1f); + assertEquals(unprocessedNotFoundManualEntity.getPositions() + .get(0).getPageNumber(), 1); + assertEquals(unprocessedNotFoundManualEntity.getPositions() + .get(0).getRectangle()[0], 1f); + assertEquals(unprocessedNotFoundManualEntity.getPositions() + .get(0).getRectangle()[1], 1f); + assertEquals(unprocessedNotFoundManualEntity.getPositions() + .get(0).getRectangle()[2], 1f); + assertEquals(unprocessedNotFoundManualEntity.getPositions() + .get(0).getRectangle()[3], 1f); analyzeService.reanalyze(request); @@ -190,10 +215,14 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT assertEquals(unprocessedManualEntities.get(0).getTextAfter(), " without PPE (34%"); assertEquals(unprocessedManualEntities.get(0).getTextBefore(), "to EFSA guidance "); assertEquals(unprocessedManualEntities.get(0).getSection(), "[1, 1]: Paragraph: A9396G containing 960 g/L"); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).x(), positions.get(0).getTopLeftX()); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).y(), positions.get(0).getTopLeftY()); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).w(), positions.get(0).getWidth()); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).h(), positions.get(0).getHeight()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).x(), positions.get(0).getTopLeftX()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).y(), positions.get(0).getTopLeftY()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).w(), positions.get(0).getWidth()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).h(), positions.get(0).getHeight()); } @@ -205,13 +234,37 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT ManualRedactions manualRedactions = new ManualRedactions(); var aoelId = UUID.randomUUID().toString(); - ManualRedactionEntry manualRedactionEntry = prepareManualRedactionEntry(aoelId, List.of(Rectangle.builder().topLeftX(384.85536f).topLeftY(240.8695f).width(13.49088f).height(10.048125f).page(1).build()), "EL"); + ManualRedactionEntry manualRedactionEntry = prepareManualRedactionEntry(aoelId, + List.of(Rectangle.builder() + .topLeftX(384.85536f) + .topLeftY(240.8695f) + .width(13.49088f) + .height(10.048125f) + .page(1) + .build()), + "EL"); var cormsId = UUID.randomUUID().toString(); - ManualRedactionEntry manualRedactionEntry2 = prepareManualRedactionEntry(cormsId, List.of(Rectangle.builder().topLeftX(129.86f).topLeftY(505.7295f).width(35.9904f).height(10.048125f).page(1).build()), "CoRMS"); + ManualRedactionEntry manualRedactionEntry2 = prepareManualRedactionEntry(cormsId, + List.of(Rectangle.builder() + .topLeftX(129.86f) + .topLeftY(505.7295f) + .width(35.9904f) + .height(10.048125f) + .page(1) + .build()), + "CoRMS"); var a9Id = UUID.randomUUID().toString(); - ManualRedactionEntry manualRedactionEntry3 = prepareManualRedactionEntry(a9Id, List.of(Rectangle.builder().topLeftX(140.1096f).topLeftY(291.5095f).width(37.84512f).height(10.048125f).page(1).build()), "A9396G"); + ManualRedactionEntry manualRedactionEntry3 = prepareManualRedactionEntry(a9Id, + List.of(Rectangle.builder() + .topLeftX(140.1096f) + .topLeftY(291.5095f) + .width(37.84512f) + .height(10.048125f) + .page(1) + .build()), + "A9396G"); manualRedactions.getEntriesToAdd().add(manualRedactionEntry3); manualRedactions.getEntriesToAdd().add(manualRedactionEntry2); @@ -238,35 +291,53 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT assertFalse(unprocessedManualEntities.isEmpty()); assertEquals(unprocessedManualEntities.size(), 3); - var resizedAoel = unprocessedManualEntities.stream().filter(unprocessedManualEntity -> unprocessedManualEntity.getAnnotationId().equals(aoelId)).findFirst(); + var resizedAoel = unprocessedManualEntities.stream() + .filter(unprocessedManualEntity -> unprocessedManualEntity.getAnnotationId().equals(aoelId)) + .findFirst(); assertTrue(resizedAoel.isPresent()); assertEquals(resizedAoel.get().getTextAfter(), " (max. 43% of"); assertEquals(resizedAoel.get().getTextBefore(), "is below the "); assertEquals(resizedAoel.get().getSection(), "[1, 1]: Paragraph: A9396G containing 960 g/L"); - assertEquals(resizedAoel.get().getPositions().get(0).x(), positions.get(0).getTopLeftX()); - assertEquals(resizedAoel.get().getPositions().get(0).y(), positions.get(0).getTopLeftY()); - assertEquals(resizedAoel.get().getPositions().get(0).w(), positions.get(0).getWidth()); - assertEquals(resizedAoel.get().getPositions().get(0).h(), positions.get(0).getHeight()); + assertEquals(resizedAoel.get().getPositions() + .get(0).x(), positions.get(0).getTopLeftX()); + assertEquals(resizedAoel.get().getPositions() + .get(0).y(), positions.get(0).getTopLeftY()); + assertEquals(resizedAoel.get().getPositions() + .get(0).w(), positions.get(0).getWidth()); + assertEquals(resizedAoel.get().getPositions() + .get(0).h(), positions.get(0).getHeight()); - var cormsResized = unprocessedManualEntities.stream().filter(unprocessedManualEntity -> unprocessedManualEntity.getAnnotationId().equals(cormsId)).findFirst(); + var cormsResized = unprocessedManualEntities.stream() + .filter(unprocessedManualEntity -> unprocessedManualEntity.getAnnotationId().equals(cormsId)) + .findFirst(); assertTrue(cormsResized.isPresent()); assertEquals(cormsResized.get().getTextAfter(), " a NOAEL of"); assertEquals(cormsResized.get().getTextBefore(), "mg/kg bw/d. Furthermore "); assertEquals(cormsResized.get().getSection(), "[0, 3]: Paragraph: The Co-RMS indicated the"); - assertEquals(cormsResized.get().getPositions().get(0).x(), positions2.get(0).getTopLeftX()); - assertEquals(cormsResized.get().getPositions().get(0).y(), positions2.get(0).getTopLeftY()); - assertEquals(cormsResized.get().getPositions().get(0).w(), positions2.get(0).getWidth()); - assertEquals(cormsResized.get().getPositions().get(0).h(), positions2.get(0).getHeight()); + assertEquals(cormsResized.get().getPositions() + .get(0).x(), positions2.get(0).getTopLeftX()); + assertEquals(cormsResized.get().getPositions() + .get(0).y(), positions2.get(0).getTopLeftY()); + assertEquals(cormsResized.get().getPositions() + .get(0).w(), positions2.get(0).getWidth()); + assertEquals(cormsResized.get().getPositions() + .get(0).h(), positions2.get(0).getHeight()); - var a9Resized = unprocessedManualEntities.stream().filter(unprocessedManualEntity -> unprocessedManualEntity.getAnnotationId().equals(a9Id)).findFirst(); + var a9Resized = unprocessedManualEntities.stream() + .filter(unprocessedManualEntity -> unprocessedManualEntity.getAnnotationId().equals(a9Id)) + .findFirst(); assertTrue(a9Resized.isPresent()); assertEquals(a9Resized.get().getTextAfter(), " were obtained from"); assertEquals(a9Resized.get().getTextBefore(), "data for S"); assertEquals(a9Resized.get().getSection(), "[1, 1]: Paragraph: A9396G containing 960 g/L"); - assertEquals(a9Resized.get().getPositions().get(0).x(), positions3.get(0).getTopLeftX()); - assertEquals(a9Resized.get().getPositions().get(0).y(), positions3.get(0).getTopLeftY()); - assertEquals(a9Resized.get().getPositions().get(0).w(), positions3.get(0).getWidth()); - assertEquals(a9Resized.get().getPositions().get(0).h(), positions3.get(0).getHeight()); + assertEquals(a9Resized.get().getPositions() + .get(0).x(), positions3.get(0).getTopLeftX()); + assertEquals(a9Resized.get().getPositions() + .get(0).y(), positions3.get(0).getTopLeftY()); + assertEquals(a9Resized.get().getPositions() + .get(0).w(), positions3.get(0).getWidth()); + assertEquals(a9Resized.get().getPositions() + .get(0).h(), positions3.get(0).getHeight()); } @@ -277,7 +348,15 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT ManualRedactions manualRedactions = new ManualRedactions(); var aoelId = UUID.randomUUID().toString(); - ManualRedactionEntry manualRedactionEntry = prepareManualRedactionEntry(aoelId, List.of(Rectangle.builder().topLeftX(384.85536f).topLeftY(240.8695f).width(13.49088f).height(10.048125f).page(1).build()), "EL"); + ManualRedactionEntry manualRedactionEntry = prepareManualRedactionEntry(aoelId, + List.of(Rectangle.builder() + .topLeftX(384.85536f) + .topLeftY(240.8695f) + .width(13.49088f) + .height(10.048125f) + .page(1) + .build()), + "EL"); manualRedactions.getEntriesToAdd().add(manualRedactionEntry); AnalyzeRequest request = uploadFileToStorage(pdfFile); @@ -301,10 +380,14 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT assertEquals(unprocessedManualEntities.get(0).getTextAfter(), " (max. 43% of"); assertEquals(unprocessedManualEntities.get(0).getTextBefore(), "is below the "); assertEquals(unprocessedManualEntities.get(0).getSection(), "[1, 1]: Paragraph: A9396G containing 960 g/L"); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).x(), positions.get(0).getTopLeftX()); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).y(), positions.get(0).getTopLeftY()); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).w(), positions.get(0).getWidth()); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).h(), positions.get(0).getHeight()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).x(), positions.get(0).getTopLeftX()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).y(), positions.get(0).getTopLeftY()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).w(), positions.get(0).getWidth()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).h(), positions.get(0).getHeight()); } @@ -315,7 +398,15 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT ManualRedactions manualRedactions = new ManualRedactions(); var aoelId = UUID.randomUUID().toString(); - ManualRedactionEntry manualRedactionEntry = prepareManualRedactionEntry(aoelId, List.of(Rectangle.builder().topLeftX(384.85536f).topLeftY(240.8695f).width(13.49088f).height(10.048125f).page(1).build()), "EL"); + ManualRedactionEntry manualRedactionEntry = prepareManualRedactionEntry(aoelId, + List.of(Rectangle.builder() + .topLeftX(384.85536f) + .topLeftY(240.8695f) + .width(13.49088f) + .height(10.048125f) + .page(1) + .build()), + "EL"); manualRedactions.getEntriesToAdd().add(manualRedactionEntry); AnalyzeRequest request = uploadFileToStorage(pdfFile); @@ -339,10 +430,14 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT assertEquals(unprocessedManualEntities.get(0).getTextAfter(), ", the same"); assertEquals(unprocessedManualEntities.get(0).getTextBefore(), "to set an "); assertEquals(unprocessedManualEntities.get(0).getSection(), "[0, 4]: Paragraph: With respect to the"); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).x(), positions.get(0).getTopLeftX()); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).y(), positions.get(0).getTopLeftY()); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).w(), positions.get(0).getWidth()); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).h(), positions.get(0).getHeight()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).x(), positions.get(0).getTopLeftX()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).y(), positions.get(0).getTopLeftY()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).w(), positions.get(0).getWidth()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).h(), positions.get(0).getHeight()); } @@ -353,7 +448,15 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT ManualRedactions manualRedactions = new ManualRedactions(); var aoelId = UUID.randomUUID().toString(); - ManualRedactionEntry manualRedactionEntry = prepareManualRedactionEntry(aoelId, List.of(Rectangle.builder().topLeftX(384.85536f).topLeftY(240.8695f).width(13.49088f).height(10.048125f).page(1).build()), "EL"); + ManualRedactionEntry manualRedactionEntry = prepareManualRedactionEntry(aoelId, + List.of(Rectangle.builder() + .topLeftX(384.85536f) + .topLeftY(240.8695f) + .width(13.49088f) + .height(10.048125f) + .page(1) + .build()), + "EL"); manualRedactions.getEntriesToAdd().add(manualRedactionEntry); AnalyzeRequest request = uploadFileToStorage(pdfFile); @@ -377,25 +480,32 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT private static ManualResizeRedaction prepareManualSizeRedaction(String id, List positions, String value) { - ManualResizeRedaction manualResizeRedaction = new ManualResizeRedaction(); - manualResizeRedaction.setAnnotationId(id); - manualResizeRedaction.setPositions(positions); - manualResizeRedaction.setUpdateDictionary(false); - manualResizeRedaction.setAddToAllDossiers(false); - manualResizeRedaction.setValue(value); - return manualResizeRedaction; + return ManualResizeRedaction.builder() + .annotationId(id) + .fileId("fileId") + .user("user") + .positions(positions) + .updateDictionary(false) + .addToAllDossiers(false) + .value(value) + .requestDate(OffsetDateTime.now()) + .build(); } private static ManualRedactionEntry prepareManualRedactionEntry(String id, List positions, String value) { - ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry(); - manualRedactionEntry.setAnnotationId(id); - manualRedactionEntry.setFileId("fileId"); - manualRedactionEntry.setType("CBI_author"); - manualRedactionEntry.setValue(value); - manualRedactionEntry.setReason("Manual Redaction"); - manualRedactionEntry.setPositions(positions); - return manualRedactionEntry; + return ManualRedactionEntry.builder() + .annotationId(id) + .fileId("fileId") + .user("user") + .type("CBI_author") + .value(value) + .reason("Manual Redaction") + .processedDate(OffsetDateTime.now()) + .requestDate(OffsetDateTime.now()) + .positions(positions) + .build(); } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java new file mode 100644 index 00000000..7fae90bc --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java @@ -0,0 +1,61 @@ +package com.iqser.red.service.redaction.v1.server.utils; + +import java.awt.Color; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.pdfbox.cos.COSName; + +import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage; +import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; +import com.knecon.fforesight.service.viewerdoc.ContentStreams; +import com.knecon.fforesight.service.viewerdoc.model.ColoredRectangle; +import com.knecon.fforesight.service.viewerdoc.model.VisualizationsOnPage; + +import lombok.experimental.UtilityClass; + +@UtilityClass +public class EntityVisualizationUtility { + + public static final ContentStreams.Identifier ENTITY_LAYER = new ContentStreams.Identifier("Entities", COSName.getPDFName("KNECON_ENTITIES"), true); + + + public Map createVisualizationsOnPage(Collection entity, Color color) { + + Map visualizations = new HashMap<>(); + Set pages = entity.stream() + .map(TextEntity::getPages) + .flatMap(Collection::stream) + .collect(Collectors.toSet()); + + pages.forEach(page -> visualizations.put(page.getNumber() - 1, buildVisualizationsOnPage(color, page))); + + return visualizations; + } + + + private static VisualizationsOnPage buildVisualizationsOnPage(Color color, Page page) { + + return VisualizationsOnPage.builder().coloredRectangles(getEntityRectangles(color, page)).build(); + } + + + private static List getEntityRectangles(Color color, Page page) { + + return page.getEntities() + .stream() + .map(TextEntity::getPositionsOnPagePerPage) + .flatMap(Collection::stream) + .filter(p -> p.getPage().equals(page)) + .map(PositionOnPage::getRectanglePerLine) + .flatMap(Collection::stream) + .map(r -> new ColoredRectangle(r, color, 1)) + .toList(); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java index f9eaa926..7d266acf 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java @@ -16,7 +16,6 @@ public class LayoutParsingRequestProvider { var originFileStorageId = RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), FileType.ORIGIN); var tablesFileStorageId = RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), FileType.TABLES); var imagesFileStorageId = RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), FileType.IMAGE_INFO); - var sectionGridStorageId = RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), FileType.SECTION_GRID); var structureFileStorageId = RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), FileType.DOCUMENT_STRUCTURE); var textBlockFileStorageId = RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), FileType.DOCUMENT_TEXT); var positionBlockFileStorageId = RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), FileType.DOCUMENT_POSITION); @@ -33,7 +32,8 @@ public class LayoutParsingRequestProvider { .textBlockFileStorageId(textBlockFileStorageId) .positionBlockFileStorageId(positionBlockFileStorageId) .pageFileStorageId(pageFileStorageId) - .simplifiedTextStorageId(simplifiedTextStorageId).viewerDocumentStorageId(viewerDocumentStorageId) + .simplifiedTextStorageId(simplifiedTextStorageId) + .viewerDocumentStorageId(viewerDocumentStorageId) .build(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl index 85083ed2..b3e45289 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl @@ -153,6 +153,32 @@ rule "CBI.7.1: Do not redact Names and Addresses if published information found $authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress)); end +rule "CBI.7.2: Do not redact PII if published information found in Section without tables" + when + $section: Section(!hasTables(), + hasEntitiesOfType("published_information"), + hasEntitiesOfType("PII")) + then + $section.getEntitiesOfType("PII") + .forEach(redactionEntity -> { + redactionEntity.skipWithReferences( + "CBI.7.2", + "Published Information found in section", + $section.getEntitiesOfType("published_information") + ); + }); + end + +rule "CBI.7.3: Do not redact PII if published information found in same table row" + when + $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII")) + $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() + $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() + $pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities() + then + $pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii)); + end + // Rule unit: CBI.9 rule "CBI.9.0: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" @@ -181,6 +207,19 @@ rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end +rule "CBI.9.2: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.10 rule "CBI.10.0: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" @@ -209,6 +248,32 @@ rule "CBI.10.1: Redact all cells with Header Author as CBI_author (vertebrate st .forEach(redactionEntity -> redactionEntity.redact("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "CBI.10.2: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "CBI.10.3: Redact all cells with Header Author as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author")) + then + $table.streamTableCellsWithHeader("Author") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.3", "Author found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.11 rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" @@ -222,7 +287,19 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study // Rule unit: CBI.16 -rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" +rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -230,12 +307,12 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end -rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" +rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -243,7 +320,19 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); dictionary.recommendEverywhere(entity); }); end @@ -268,7 +357,19 @@ rule "CBI.17.1: Add recommendation for Addresses in Test Organism sections, with // Rule unit: CBI.20 -rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" +rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\")" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.0", "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + +rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -276,12 +377,12 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study"); + laboratoryEntity.skip("CBI.20.1", "PERFORMING LABORATORY was found for non vertebrate study"); dictionary.recommendEverywhere(laboratoryEntity); }); end -rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" +rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -289,7 +390,19 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.redact("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + laboratoryEntity.redact("CBI.20.2", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + +rule "CBI.20.3: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\"" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.3", "PERFORMING LABORATORY was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -298,45 +411,92 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC //------------------------------------ PII rules ------------------------------------ // Rule unit: PII.0 -rule "PII.0.0: Redact all PII (non vertebrate study)" +rule "PII.0.0: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.0", "Personal Information found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "PII.0.1: Redact all PII (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "PII.0.1: Redact all PII (vertebrate study)" +rule "PII.0.2: Redact all PII (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.2", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + +rule "PII.0.3: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.3", "Personal Information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); end // Rule unit: PII.1 -rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)" +rule "PII.1.0: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.1.1: Redact Emails by RegEx (Non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" +rule "PII.1.2: Redact Emails by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + +rule "PII.1.5: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.5", "Found by Email Regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); end // Rule unit: PII.2 -rule "PII.2.0: Redact Phone and Fax by RegEx (non vertebrate study)" +rule "PII.2.0: Redact Phone and Fax by RegEx" + when + $section: Section(containsString("Contact") || + containsString("Telephone") || + containsString("Phone") || + containsString("Ph.") || + containsString("Fax") || + containsString("Tel") || + containsString("Ter") || + containsString("Mobile") || + containsString("Fel") || + containsString("Fer")) + then + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + .forEach(contactEntity -> contactEntity.redact("PII.2.0", "Found by Phone and Fax Regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.2.1: Redact Phone and Fax by RegEx (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("Contact") || @@ -351,10 +511,10 @@ rule "PII.2.0: Redact Phone and Fax by RegEx (non vertebrate study)" containsString("Fer")) then entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> contactEntity.redact("PII.2.0", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" +rule "PII.2.2: Redact Phone and Fax by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("Contact") || @@ -369,34 +529,41 @@ rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" containsString("Fer")) then entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.2.2", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.3 -rule "PII.3.0: Redact telephone numbers by RegEx (Non vertebrate study)" +rule "PII.3.0: Redact telephone numbers by RegEx" + when + $section: Section(matchesRegex("[+]\\d{1,}")) + then + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) + .forEach(entity -> entity.redact("PII.3.0", "Telephone number found by regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.3.1: Redact telephone numbers by RegEx (Non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.redact("PII.3.0", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.3.1: Redact telephone numbers by RegEx (vertebrate study)" +rule "PII.3.2: Redact telephone numbers by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.3.2", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.4 -rule "PII.4.0: Redact line after contact information keywords (non vertebrate study)" +rule "PII.4.0: Redact line after contact information keywords" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -422,9 +589,8 @@ rule "PII.4.0: Redact line after contact information keywords (non vertebrate st .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.4.1: Redact line after contact information keywords (vertebrate study)" +rule "PII.4.1: Redact line after contact information keywords" when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -447,12 +613,24 @@ rule "PII.4.1: Redact line after contact information keywords (vertebrate study) $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); end // Rule unit: PII.5 -rule "PII.5.0: Redact line after contact information keywords reduced (non vertebrate study)" +rule "PII.5.0: Redact line after contact information keywords reduced" + when + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.5.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.5.1: Redact line after contact information keywords reduced (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", @@ -462,10 +640,10 @@ rule "PII.5.0: Redact line after contact information keywords reduced (non verte $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.5.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.5.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.5.1: Redact line after contact information keywords reduced (Vertebrate study)" +rule "PII.5.2: Redact line after contact information keywords reduced (Vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", @@ -475,12 +653,23 @@ rule "PII.5.1: Redact line after contact information keywords reduced (Vertebrat $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.5.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.5.2", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.6 -rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" +rule "PII.6.0: Redact line between contact keywords" + when + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.6.1: Redact line between contact keywords (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) @@ -489,10 +678,10 @@ rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.6.1: Redact line between contact keywords (vertebrate study)" +rule "PII.6.2: Redact line between contact keywords (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) @@ -501,12 +690,41 @@ rule "PII.6.1: Redact line between contact keywords (vertebrate study)" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + +rule "PII.6.3: Redact line between contact keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.3", "Found between contact keywords", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); end // Rule unit: PII.7 -rule "PII.7.0: Redact contact information if applicant is found (non vertebrate study)" +rule "PII.7.0: Redact contact information if applicant is found" + when + $section: Section(getHeadline().containsString("applicant") || + getHeadline().containsString("Primary contact") || + getHeadline().containsString("Alternative contact") || + containsString("Applicant") || + containsString("Telephone number:")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.7.1: Redact contact information if applicant is found (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(getHeadline().containsString("applicant") || @@ -521,10 +739,10 @@ rule "PII.7.0: Redact contact information if applicant is found (non vertebrate entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.7.1: Redact contact information if applicant is found (vertebrate study)" +rule "PII.7.2: Redact contact information if applicant is found (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(getHeadline().containsString("applicant") || @@ -539,14 +757,13 @@ rule "PII.7.1: Redact contact information if applicant is found (vertebrate stud entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.2", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.8 -rule "PII.8.0: Redact contact information if producer is found (non vertebrate study)" +rule "PII.8.0: Redact contact information if producer is found" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsStringIgnoreCase("producer of the plant protection") || containsStringIgnoreCase("producer of the active substance") || containsStringIgnoreCase("manufacturer of the active substance") || @@ -562,7 +779,25 @@ rule "PII.8.0: Redact contact information if producer is found (non vertebrate s .forEach(entity -> entity.redact("PII.8.0", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.8.1: Redact contact information if producer is found (vertebrate study)" +rule "PII.8.1: Redact contact information if producer is found (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsStringIgnoreCase("producer of the plant protection") || + containsStringIgnoreCase("producer of the active substance") || + containsStringIgnoreCase("manufacturer of the active substance") || + containsStringIgnoreCase("manufacturer:") || + containsStringIgnoreCase("Producer or producers of the active substance")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "PII.8.2: Redact contact information if producer is found (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsStringIgnoreCase("producer of the plant protection") || @@ -577,27 +812,35 @@ rule "PII.8.1: Redact contact information if producer is found (vertebrate study entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.8.2", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.9 -rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" +rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" +rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -610,6 +853,14 @@ rule "PII.10.0: Redact study director abbreviation" .forEach(entity -> entity.redact("PII.10.0", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end +rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.11 rule "PII.11.0: Redact On behalf of Sequani Ltd.:" @@ -654,49 +905,87 @@ rule "ETC.0.0: Purity Hint" // Rule unit: ETC.2 -rule "ETC.2.0: Redact signatures (non vertebrate study)" +rule "ETC.2.0: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.0", "Signature Found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "ETC.2.1: Redact signatures (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.redact("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "ETC.2.1: Redact signatures (vertebrate study)" +rule "ETC.2.2: Redact signatures (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + +rule "ETC.2.3: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.3", "Signature Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); end // Rule unit: ETC.3 -rule "ETC.3.0: Skip logos (non vertebrate study)" +rule "ETC.3.0: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.0", "Logo Found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "ETC.3.1: Skip logos (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.skip("ETC.3.0", "Logo Found"); + $logo.skip("ETC.3.1", "Logo Found"); end -rule "ETC.3.1: Redact logos (vertebrate study)" +rule "ETC.3.2: Redact logos (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.redact("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "ETC.3.3: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.3", "Logo Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); end // Rule unit: ETC.5 -rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'" +rule "ETC.5.0: Skip dossier_redaction entries if confidentiality is 'confidential'" + when + FileAttribute(label == "Confidentiality", value == "confidential") + $dossierRedaction: TextEntity(type() == "dossier_redaction") + then + $dossierRedaction.skip("ETC.5.0", "Ignore dossier_redaction when confidential"); + $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + end + +rule "ETC.5.1: Remove dossier_redaction entries if confidentiality is not 'confidential'" + salience 256 when not FileAttribute(label == "Confidentiality", value == "confidential") $dossierRedaction: TextEntity(type() == "dossier_redaction") then - $dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential"); - $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + $dossierRedaction.remove("ETC.5.1", "Remove dossier_redaction when not confidential"); + retract($dossierRedaction); end @@ -846,7 +1135,7 @@ rule "MAN.3.2: Apply image recategorization" rule "MAN.3.3: Apply recategorization entities by default" salience 128 when - $entity: IEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) then $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end @@ -882,8 +1171,8 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, active() || skipped()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) + $larger: TextEntity($type: type(), $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -911,7 +1200,7 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -924,7 +1213,7 @@ rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM salience 64 when $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -936,7 +1225,7 @@ rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY salience 256 when $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); @@ -949,7 +1238,7 @@ rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); retract($recommendation); @@ -959,7 +1248,7 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI salience 256 when $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); @@ -967,26 +1256,26 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI // Rule unit: X.6 -rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type ENTITY" +rule "X.6.0: Remove Entity of lower rank, when contained by entity of type ENTITY or HINT" salience 32 when $higherRank: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active()) + $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges()) then $lowerRank.getIntersectingNodes().forEach(node -> update(node)); - $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY"); + $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY or HINT"); retract($lowerRank); end -rule "X.6.1: remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity" +rule "X.6.1: remove Entity, when contained in another entity of type ENTITY or HINT with larger text range" salience 32 when - $higherRank: TextEntity($type: type(), $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active(), !hasManualChanges()) - $lowerRank: TextEntity(intersects($higherRank), type() != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), active(), $lowerRank.getValue().length() > $value.length()) + $outer: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $inner: TextEntity(containedBy($outer), type() != $type, $outer.getTextRange().length > getTextRange().length(), !hasManualChanges()) then - $higherRank.getIntersectingNodes().forEach(node -> update(node)); - $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); - retract($higherRank); + $inner.getIntersectingNodes().forEach(node -> update(node)); + $inner.remove("X.6.1", "remove Entity, when contained in another entity of type ENTITY or HINT with larger text range"); + retract($inner); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index ce417ea6..6c1e2519 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -207,7 +207,7 @@ rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is fo }); end -rule "CBI.4.1: Do not redact Names and Addresses if no_redaction_indicator is found in table row" +rule "CBI.4.1: Don't redact authors or addresses which appear in the same row as a vertebrate and a no_redaction_indicator" when $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("vertebrate"), @@ -262,7 +262,7 @@ rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also red "no_redaction_indicator but also redaction_indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)", Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), + $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity).stream(), $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() ); }); @@ -336,6 +336,32 @@ rule "CBI.7.1: Do not redact Names and Addresses if published information found $authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress)); end +rule "CBI.7.2: Do not redact PII if published information found in Section without tables" + when + $section: Section(!hasTables(), + hasEntitiesOfType("published_information"), + hasEntitiesOfType("PII")) + then + $section.getEntitiesOfType("PII") + .forEach(redactionEntity -> { + redactionEntity.skipWithReferences( + "CBI.7.2", + "Published Information found in section", + $section.getEntitiesOfType("published_information") + ); + }); + end + +rule "CBI.7.3: Do not redact PII if published information found in same table row" + when + $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII")) + $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() + $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() + $pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities() + then + $pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii)); + end + // Rule unit: CBI.8 rule "CBI.8.0: Redacted because Section contains must_redact entity" @@ -361,11 +387,11 @@ rule "CBI.8.1: Redacted because table row contains must_redact entity" .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) .forEach(entity -> { entity.applyWithReferences( - "CBI.8.1", - "must_redact entity found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("must_redact", entity) - ); + "CBI.8.1", + "Must_redact found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("must_redact", entity) + ); }); end @@ -397,6 +423,19 @@ rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end +rule "CBI.9.2: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.10 rule "CBI.10.0: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" @@ -425,6 +464,32 @@ rule "CBI.10.1: Redact all cells with Header Author as CBI_author (vertebrate st .forEach(redactionEntity -> redactionEntity.redact("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "CBI.10.2: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "CBI.10.3: Redact all cells with Header Author as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author")) + then + $table.streamTableCellsWithHeader("Author") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.3", "Author found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.11 rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" @@ -438,7 +503,22 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study // Rule unit: CBI.12 -rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (non vertebrate study)" +rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes'" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) + TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() + TableCell(header, containsStringIgnoreCase("Vertebrate study Y/N"), $vertebrateCol: col) from $table.streamHeaders().toList() + $rowCell: TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() + TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() + then + entityCreationService.bySemanticNode($rowCell, "must_redact", EntityType.HINT) + .ifPresent(yesEntity -> { + yesEntity.skip("CBI.12.0", "must_redact"); + }); + end + +rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -448,16 +528,15 @@ rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() $authorCell: TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() then - entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) .ifPresent(authorEntity -> { - authorEntity.redact("CBI.12.0", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + authorEntity.redact("CBI.12.1", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.addMultipleAuthorsAsRecommendation(authorEntity); }); end -rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (vertebrate study)" +rule "CBI.12.2: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -470,13 +549,13 @@ rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) .ifPresent(authorEntity -> { - authorEntity.redact("CBI.12.1", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + authorEntity.redact("CBI.12.2", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.addMultipleAuthorsAsRecommendation(authorEntity); }); end -rule "CBI.12.2: Skip TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'No'" +rule "CBI.12.3: Skip TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'No'" when $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() @@ -485,7 +564,7 @@ rule "CBI.12.2: Skip TableCell with header 'Author' or 'Author(s)' and header 'V $authorCell: TableCell(row == $rowWithNo) from $table.streamCol($authorCol).toList() then entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) - .ifPresent(authorEntity -> authorEntity.skip("CBI.12.2", "Not redacted because it's row does not belong to a vertebrate study")); + .ifPresent(authorEntity -> authorEntity.skip("CBI.12.3", "Not redacted because it's row does not belong to a vertebrate study")); end @@ -503,9 +582,12 @@ rule "CBI.13.0: Ignore CBI Address recommendations" // Rule unit: CBI.14 rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at\"" when + $section: Section(containsStringIgnoreCase("batches produced at")) $sponsorEntity: TextEntity(type() == "CBI_sponsor", textBefore.contains("batches produced at")) then $sponsorEntity.redact("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + entityCreationService.byString("batches produced at", "must_redact", EntityType.HINT, $section) + .forEach(entity -> entity.skip("CBI.14.0", "must_redact")); end @@ -526,10 +608,10 @@ rule "CBI.15.0: Redact row if row contains \"determination of residues\" and liv containsStringIgnoreCase($residueKeyword), containsStringIgnoreCase($keyword)) then - entityCreationService.byString($keyword, "must_redact", EntityType.ENTITY, $section) - .toList(); + entityCreationService.byString($keyword, "must_redact", EntityType.HINT, $section) + .forEach(entity -> entity.skip("CBI.15.0", "must_redact")); - $section.getEntitiesOfType(List.of($keyword, $residueKeyword)) + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(redactionEntity -> redactionEntity.redact("CBI.15.0", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end @@ -547,8 +629,8 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio $residueKeyword: String() from List.of("determination of residues", "determination of total residues") $table: Table(containsStringIgnoreCase($residueKeyword), containsStringIgnoreCase($keyword)) then - entityCreationService.byString($keyword, "must_redact", EntityType.ENTITY, $table) - .toList(); + entityCreationService.byString($keyword, "must_redact", EntityType.HINT, $table) + .forEach(entity -> entity.skip("CBI.15.1", "must_redact")); $table.streamEntitiesWhereRowContainsStringsIgnoreCase(List.of($keyword, $residueKeyword)) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) @@ -557,7 +639,19 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio // Rule unit: CBI.16 -rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" +rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -565,12 +659,12 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end -rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" +rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -578,7 +672,19 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); dictionary.recommendEverywhere(entity); }); end @@ -636,7 +742,19 @@ rule "CBI.19.0: Expand CBI_author entities with salutation prefix" // Rule unit: CBI.20 -rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" +rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\")" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.0", "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + +rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -644,12 +762,12 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study"); + laboratoryEntity.skip("CBI.20.1", "PERFORMING LABORATORY was found for non vertebrate study"); dictionary.recommendEverywhere(laboratoryEntity); }); end -rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" +rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -657,7 +775,19 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.redact("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + laboratoryEntity.redact("CBI.20.2", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + +rule "CBI.20.3: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\"" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.3", "PERFORMING LABORATORY was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -704,53 +834,116 @@ rule "CBI.22.0: Redact Addresses in Reference Tables for vertebrate studies in n //------------------------------------ PII rules ------------------------------------ // Rule unit: PII.0 -rule "PII.0.0: Redact all PII (non vertebrate study)" +rule "PII.0.0: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.0", "Personal Information found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "PII.0.1: Redact all PII (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "PII.0.1: Redact all PII (vertebrate study)" +rule "PII.0.2: Redact all PII (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.2", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + +rule "PII.0.3: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.3", "Personal Information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); end // Rule unit: PII.1 -rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)" +rule "PII.1.0: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.1.1: Redact Emails by RegEx (Non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" +rule "PII.1.2: Redact Emails by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end -rule "PII.1.2: Redact typoed Emails with indicator" +rule "PII.1.3: Redact typoed Emails with indicator" when $section: Section(containsString("@") || containsStringIgnoreCase("mail")) then - entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.3", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "PII.1.4: Redact typoed Emails with indicator" + when + $section: Section(containsString("@") || containsStringIgnoreCase("mail")) + then + entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.4", "Personal information found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.1.5: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.5", "Found by Email Regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "PII.1.6: Redact typoed Emails with indicator" + when + $section: Section(containsString("@") || containsStringIgnoreCase("mail")) + then + entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.6", "Personal information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); end // Rule unit: PII.2 -rule "PII.2.0: Redact Phone and Fax by RegEx (non vertebrate study)" +rule "PII.2.0: Redact Phone and Fax by RegEx" + when + $section: Section(containsString("Contact") || + containsString("Telephone") || + containsString("Phone") || + containsString("Ph.") || + containsString("Fax") || + containsString("Tel") || + containsString("Ter") || + containsString("Mobile") || + containsString("Fel") || + containsString("Fer")) + then + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + .forEach(contactEntity -> contactEntity.redact("PII.2.0", "Found by Phone and Fax Regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.2.1: Redact Phone and Fax by RegEx (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("Contact") || @@ -765,10 +958,10 @@ rule "PII.2.0: Redact Phone and Fax by RegEx (non vertebrate study)" containsString("Fer")) then entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> contactEntity.redact("PII.2.0", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" +rule "PII.2.2: Redact Phone and Fax by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("Contact") || @@ -783,42 +976,49 @@ rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" containsString("Fer")) then entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.2.2", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end -rule "PII.2.2: Redact phone numbers without indicators" +rule "PII.2.3: Redact phone numbers without indicators" when $section: Section(containsString("+")) then entityCreationService.byRegex("(\\+[\\dO]{1,2} )(\\([\\dO]{1,3}\\))?[\\d\\-O ]{8,15}", "PII", EntityType.ENTITY, $section) - .forEach(entity -> entity.redact("PII.2.2", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.2.3", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end // Rule unit: PII.3 -rule "PII.3.0: Redact telephone numbers by RegEx (Non vertebrate study)" +rule "PII.3.0: Redact telephone numbers by RegEx" + when + $section: Section(matchesRegex("[+]\\d{1,}")) + then + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) + .forEach(entity -> entity.redact("PII.3.0", "Telephone number found by regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.3.1: Redact telephone numbers by RegEx (Non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.redact("PII.3.0", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.3.1: Redact telephone numbers by RegEx (vertebrate study)" +rule "PII.3.2: Redact telephone numbers by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.3.2", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.4 -rule "PII.4.0: Redact line after contact information keywords (non vertebrate study)" +rule "PII.4.0: Redact line after contact information keywords" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -844,9 +1044,8 @@ rule "PII.4.0: Redact line after contact information keywords (non vertebrate st .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.4.1: Redact line after contact information keywords (vertebrate study)" +rule "PII.4.1: Redact line after contact information keywords" when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -869,12 +1068,24 @@ rule "PII.4.1: Redact line after contact information keywords (vertebrate study) $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); end // Rule unit: PII.5 -rule "PII.5.0: Redact line after contact information keywords reduced (non vertebrate study)" +rule "PII.5.0: Redact line after contact information keywords reduced" + when + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.5.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.5.1: Redact line after contact information keywords reduced (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", @@ -884,10 +1095,10 @@ rule "PII.5.0: Redact line after contact information keywords reduced (non verte $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.5.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.5.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.5.1: Redact line after contact information keywords reduced (Vertebrate study)" +rule "PII.5.2: Redact line after contact information keywords reduced (Vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", @@ -897,12 +1108,23 @@ rule "PII.5.1: Redact line after contact information keywords reduced (Vertebrat $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.5.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.5.2", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.6 -rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" +rule "PII.6.0: Redact line between contact keywords" + when + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.6.1: Redact line between contact keywords (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) @@ -911,10 +1133,10 @@ rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.6.1: Redact line between contact keywords (vertebrate study)" +rule "PII.6.2: Redact line between contact keywords (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) @@ -923,12 +1145,41 @@ rule "PII.6.1: Redact line between contact keywords (vertebrate study)" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + +rule "PII.6.3: Redact line between contact keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.3", "Found between contact keywords", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); end // Rule unit: PII.7 -rule "PII.7.0: Redact contact information if applicant is found (non vertebrate study)" +rule "PII.7.0: Redact contact information if applicant is found" + when + $section: Section(getHeadline().containsString("applicant") || + getHeadline().containsString("Primary contact") || + getHeadline().containsString("Alternative contact") || + containsString("Applicant") || + containsString("Telephone number:")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.7.1: Redact contact information if applicant is found (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(getHeadline().containsString("applicant") || @@ -943,10 +1194,10 @@ rule "PII.7.0: Redact contact information if applicant is found (non vertebrate entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.7.1: Redact contact information if applicant is found (vertebrate study)" +rule "PII.7.2: Redact contact information if applicant is found (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(getHeadline().containsString("applicant") || @@ -961,14 +1212,13 @@ rule "PII.7.1: Redact contact information if applicant is found (vertebrate stud entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.2", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.8 -rule "PII.8.0: Redact contact information if producer is found (non vertebrate study)" +rule "PII.8.0: Redact contact information if producer is found" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsStringIgnoreCase("producer of the plant protection") || containsStringIgnoreCase("producer of the active substance") || containsStringIgnoreCase("manufacturer of the active substance") || @@ -984,7 +1234,25 @@ rule "PII.8.0: Redact contact information if producer is found (non vertebrate s .forEach(entity -> entity.redact("PII.8.0", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.8.1: Redact contact information if producer is found (vertebrate study)" +rule "PII.8.1: Redact contact information if producer is found (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsStringIgnoreCase("producer of the plant protection") || + containsStringIgnoreCase("producer of the active substance") || + containsStringIgnoreCase("manufacturer of the active substance") || + containsStringIgnoreCase("manufacturer:") || + containsStringIgnoreCase("Producer or producers of the active substance")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "PII.8.2: Redact contact information if producer is found (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsStringIgnoreCase("producer of the plant protection") || @@ -999,27 +1267,43 @@ rule "PII.8.1: Redact contact information if producer is found (vertebrate study entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.8.2", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.9 -rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" +rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" +rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + +rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); end @@ -1096,38 +1380,66 @@ rule "ETC.1.0: Redact Purity" // Rule unit: ETC.2 -rule "ETC.2.0: Redact signatures (non vertebrate study)" +rule "ETC.2.0: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.0", "Signature Found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "ETC.2.1: Redact signatures (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.redact("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "ETC.2.1: Redact signatures (vertebrate study)" +rule "ETC.2.2: Redact signatures (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + +rule "ETC.2.3: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.3", "Signature Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); end // Rule unit: ETC.3 -rule "ETC.3.0: Skip logos (non vertebrate study)" +rule "ETC.3.0: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.0", "Logo Found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "ETC.3.1: Skip logos (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.skip("ETC.3.0", "Logo Found"); + $logo.skip("ETC.3.1", "Logo Found"); end -rule "ETC.3.1: Redact logos (vertebrate study)" +rule "ETC.3.2: Redact logos (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.redact("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "ETC.3.3: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.3", "Logo Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); end @@ -1155,13 +1467,23 @@ rule "ETC.4.2: Redact dossier dictionary entries" // Rule unit: ETC.5 -rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'" +rule "ETC.5.0: Skip dossier_redaction entries if confidentiality is 'confidential'" + when + FileAttribute(label == "Confidentiality", value == "confidential") + $dossierRedaction: TextEntity(type() == "dossier_redaction") + then + $dossierRedaction.skip("ETC.5.0", "Ignore dossier_redaction when confidential"); + $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + end + +rule "ETC.5.1: Remove dossier_redaction entries if confidentiality is not 'confidential'" + salience 256 when not FileAttribute(label == "Confidentiality", value == "confidential") $dossierRedaction: TextEntity(type() == "dossier_redaction") then - $dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential"); - $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + $dossierRedaction.remove("ETC.5.1", "Remove dossier_redaction when not confidential"); + retract($dossierRedaction); end @@ -1416,7 +1738,7 @@ rule "MAN.3.2: Apply image recategorization" rule "MAN.3.3: Apply recategorization entities by default" salience 128 when - $entity: IEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) then $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end @@ -1452,8 +1774,8 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, active() || skipped()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) + $larger: TextEntity($type: type(), $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -1481,7 +1803,7 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -1494,7 +1816,7 @@ rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM salience 64 when $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -1506,7 +1828,7 @@ rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY salience 256 when $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); @@ -1519,7 +1841,7 @@ rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); retract($recommendation); @@ -1529,7 +1851,7 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI salience 256 when $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); @@ -1537,26 +1859,26 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI // Rule unit: X.6 -rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type ENTITY" +rule "X.6.0: Remove Entity of lower rank, when contained by entity of type ENTITY or HINT" salience 32 when $higherRank: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active()) + $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges()) then $lowerRank.getIntersectingNodes().forEach(node -> update(node)); - $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY"); + $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY or HINT"); retract($lowerRank); end -rule "X.6.1: remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity" +rule "X.6.1: remove Entity, when contained in another entity of type ENTITY or HINT with larger text range" salience 32 when - $higherRank: TextEntity($type: type(), $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active(), !hasManualChanges()) - $lowerRank: TextEntity(intersects($higherRank), type() != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), active(), $lowerRank.getValue().length() > $value.length()) + $outer: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $inner: TextEntity(containedBy($outer), type() != $type, $outer.getTextRange().length > getTextRange().length(), !hasManualChanges()) then - $higherRank.getIntersectingNodes().forEach(node -> update(node)); - $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); - retract($higherRank); + $inner.getIntersectingNodes().forEach(node -> update(node)); + $inner.remove("X.6.1", "remove Entity, when contained in another entity of type ENTITY or HINT with larger text range"); + retract($inner); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index 983a5e95..d2df3761 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -1276,7 +1276,7 @@ rule "MAN.3.2: Apply image recategorization" rule "MAN.3.3: Apply recategorization entities by default" salience 128 when - $entity: IEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) then $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end @@ -1312,8 +1312,8 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, active() || skipped()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) + $larger: TextEntity($type: type(), $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -1325,7 +1325,7 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -1338,7 +1338,7 @@ rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM salience 64 when $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -1350,7 +1350,7 @@ rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY salience 256 when $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); @@ -1363,7 +1363,7 @@ rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); retract($recommendation); @@ -1373,7 +1373,7 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI salience 256 when $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl index 932bfdc2..340a01ad 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl @@ -188,7 +188,7 @@ rule "MAN.3.2: Apply image recategorization" rule "MAN.3.3: Apply recategorization entities by default" salience 128 when - $entity: IEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) then $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end @@ -225,7 +225,7 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI salience 256 when $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index ade6e8f6..514b2297 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -150,7 +150,7 @@ rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is fo }); end -rule "CBI.4.1: Do not redact Names and Addresses if no_redaction_indicator is found in table row" +rule "CBI.4.1: Don't redact authors or addresses which appear in the same row as a vertebrate and a no_redaction_indicator" when $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("vertebrate"), @@ -205,7 +205,7 @@ rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also red "no_redaction_indicator but also redaction_indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)", Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), + $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity).stream(), $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() ); }); @@ -236,11 +236,11 @@ rule "CBI.8.1: Redacted because table row contains must_redact entity" .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) .forEach(entity -> { entity.applyWithReferences( - "CBI.8.1", - "must_redact entity found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("must_redact", entity) - ); + "CBI.8.1", + "Must_redact found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("must_redact", entity) + ); }); end @@ -272,6 +272,19 @@ rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end +rule "CBI.9.2: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.11 rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" @@ -285,7 +298,22 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study // Rule unit: CBI.12 -rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (non vertebrate study)" +rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes'" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) + TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() + TableCell(header, containsStringIgnoreCase("Vertebrate study Y/N"), $vertebrateCol: col) from $table.streamHeaders().toList() + $rowCell: TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() + TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() + then + entityCreationService.bySemanticNode($rowCell, "must_redact", EntityType.HINT) + .ifPresent(yesEntity -> { + yesEntity.skip("CBI.12.0", "must_redact"); + }); + end + +rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -295,16 +323,15 @@ rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() $authorCell: TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() then - entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) .ifPresent(authorEntity -> { - authorEntity.redact("CBI.12.0", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + authorEntity.redact("CBI.12.1", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.addMultipleAuthorsAsRecommendation(authorEntity); }); end -rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (vertebrate study)" +rule "CBI.12.2: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -317,13 +344,13 @@ rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) .ifPresent(authorEntity -> { - authorEntity.redact("CBI.12.1", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + authorEntity.redact("CBI.12.2", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.addMultipleAuthorsAsRecommendation(authorEntity); }); end -rule "CBI.12.2: Skip TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'No'" +rule "CBI.12.3: Skip TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'No'" when $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() @@ -332,16 +359,19 @@ rule "CBI.12.2: Skip TableCell with header 'Author' or 'Author(s)' and header 'V $authorCell: TableCell(row == $rowWithNo) from $table.streamCol($authorCol).toList() then entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) - .ifPresent(authorEntity -> authorEntity.skip("CBI.12.2", "Not redacted because it's row does not belong to a vertebrate study")); + .ifPresent(authorEntity -> authorEntity.skip("CBI.12.3", "Not redacted because it's row does not belong to a vertebrate study")); end // Rule unit: CBI.14 rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at\"" when + $section: Section(containsStringIgnoreCase("batches produced at")) $sponsorEntity: TextEntity(type() == "CBI_sponsor", textBefore.contains("batches produced at")) then $sponsorEntity.redact("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + entityCreationService.byString("batches produced at", "must_redact", EntityType.HINT, $section) + .forEach(entity -> entity.skip("CBI.14.0", "must_redact")); end @@ -362,10 +392,10 @@ rule "CBI.15.0: Redact row if row contains \"determination of residues\" and liv containsStringIgnoreCase($residueKeyword), containsStringIgnoreCase($keyword)) then - entityCreationService.byString($keyword, "must_redact", EntityType.ENTITY, $section) - .toList(); + entityCreationService.byString($keyword, "must_redact", EntityType.HINT, $section) + .forEach(entity -> entity.skip("CBI.15.0", "must_redact")); - $section.getEntitiesOfType(List.of($keyword, $residueKeyword)) + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(redactionEntity -> redactionEntity.redact("CBI.15.0", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end @@ -383,8 +413,8 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio $residueKeyword: String() from List.of("determination of residues", "determination of total residues") $table: Table(containsStringIgnoreCase($residueKeyword), containsStringIgnoreCase($keyword)) then - entityCreationService.byString($keyword, "must_redact", EntityType.ENTITY, $table) - .toList(); + entityCreationService.byString($keyword, "must_redact", EntityType.HINT, $table) + .forEach(entity -> entity.skip("CBI.15.1", "must_redact")); $table.streamEntitiesWhereRowContainsStringsIgnoreCase(List.of($keyword, $residueKeyword)) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) @@ -393,7 +423,19 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio // Rule unit: CBI.16 -rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" +rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -401,12 +443,12 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end -rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" +rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -414,7 +456,19 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); dictionary.recommendEverywhere(entity); }); end @@ -472,7 +526,19 @@ rule "CBI.19.0: Expand CBI_author entities with salutation prefix" // Rule unit: CBI.20 -rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" +rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\")" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.0", "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + +rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -480,12 +546,12 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study"); + laboratoryEntity.skip("CBI.20.1", "PERFORMING LABORATORY was found for non vertebrate study"); dictionary.recommendEverywhere(laboratoryEntity); }); end -rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" +rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -493,7 +559,19 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.redact("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + laboratoryEntity.redact("CBI.20.2", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + +rule "CBI.20.3: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\"" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.3", "PERFORMING LABORATORY was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -502,47 +580,76 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC //------------------------------------ PII rules ------------------------------------ // Rule unit: PII.0 -rule "PII.0.0: Redact all PII (non vertebrate study)" +rule "PII.0.0: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.0", "Personal Information found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "PII.0.1: Redact all PII (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "PII.0.1: Redact all PII (vertebrate study)" +rule "PII.0.2: Redact all PII (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.2", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + +rule "PII.0.3: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.3", "Personal Information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); end // Rule unit: PII.1 -rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)" +rule "PII.1.0: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.1.1: Redact Emails by RegEx (Non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" +rule "PII.1.2: Redact Emails by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + +rule "PII.1.5: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.5", "Found by Email Regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); end // Rule unit: PII.4 -rule "PII.4.0: Redact line after contact information keywords (non vertebrate study)" +rule "PII.4.0: Redact line after contact information keywords" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -568,9 +675,8 @@ rule "PII.4.0: Redact line after contact information keywords (non vertebrate st .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.4.1: Redact line after contact information keywords (vertebrate study)" +rule "PII.4.1: Redact line after contact information keywords" when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -593,12 +699,23 @@ rule "PII.4.1: Redact line after contact information keywords (vertebrate study) $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); end // Rule unit: PII.6 -rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" +rule "PII.6.0: Redact line between contact keywords" + when + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.6.1: Redact line between contact keywords (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) @@ -607,10 +724,10 @@ rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.6.1: Redact line between contact keywords (vertebrate study)" +rule "PII.6.2: Redact line between contact keywords (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) @@ -619,12 +736,42 @@ rule "PII.6.1: Redact line between contact keywords (vertebrate study)" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + + +rule "PII.6.3: Redact line between contact keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.3", "Found between contact keywords", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); end // Rule unit: PII.7 -rule "PII.7.0: Redact contact information if applicant is found (non vertebrate study)" +rule "PII.7.0: Redact contact information if applicant is found" + when + $section: Section(getHeadline().containsString("applicant") || + getHeadline().containsString("Primary contact") || + getHeadline().containsString("Alternative contact") || + containsString("Applicant") || + containsString("Telephone number:")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.7.1: Redact contact information if applicant is found (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(getHeadline().containsString("applicant") || @@ -639,10 +786,10 @@ rule "PII.7.0: Redact contact information if applicant is found (non vertebrate entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.7.1: Redact contact information if applicant is found (vertebrate study)" +rule "PII.7.2: Redact contact information if applicant is found (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(getHeadline().containsString("applicant") || @@ -657,14 +804,13 @@ rule "PII.7.1: Redact contact information if applicant is found (vertebrate stud entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.2", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.8 -rule "PII.8.0: Redact contact information if producer is found (non vertebrate study)" +rule "PII.8.0: Redact contact information if producer is found" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsStringIgnoreCase("producer of the plant protection") || containsStringIgnoreCase("producer of the active substance") || containsStringIgnoreCase("manufacturer of the active substance") || @@ -680,7 +826,25 @@ rule "PII.8.0: Redact contact information if producer is found (non vertebrate s .forEach(entity -> entity.redact("PII.8.0", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.8.1: Redact contact information if producer is found (vertebrate study)" +rule "PII.8.1: Redact contact information if producer is found (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsStringIgnoreCase("producer of the plant protection") || + containsStringIgnoreCase("producer of the active substance") || + containsStringIgnoreCase("manufacturer of the active substance") || + containsStringIgnoreCase("manufacturer:") || + containsStringIgnoreCase("Producer or producers of the active substance")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "PII.8.2: Redact contact information if producer is found (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsStringIgnoreCase("producer of the plant protection") || @@ -695,27 +859,43 @@ rule "PII.8.1: Redact contact information if producer is found (vertebrate study entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.8.2", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.9 -rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" +rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" +rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + +rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); end @@ -762,38 +942,66 @@ rule "ETC.1.0: Redact Purity" // Rule unit: ETC.2 -rule "ETC.2.0: Redact signatures (non vertebrate study)" +rule "ETC.2.0: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.0", "Signature Found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "ETC.2.1: Redact signatures (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.redact("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "ETC.2.1: Redact signatures (vertebrate study)" +rule "ETC.2.2: Redact signatures (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + +rule "ETC.2.3: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.3", "Signature Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); end // Rule unit: ETC.3 -rule "ETC.3.0: Skip logos (non vertebrate study)" +rule "ETC.3.0: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.0", "Logo Found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "ETC.3.1: Skip logos (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.skip("ETC.3.0", "Logo Found"); + $logo.skip("ETC.3.1", "Logo Found"); end -rule "ETC.3.1: Redact logos (vertebrate study)" +rule "ETC.3.2: Redact logos (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.redact("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "ETC.3.3: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.3", "Logo Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); end @@ -807,13 +1015,23 @@ rule "ETC.4.0: Redact dossier dictionary entries" // Rule unit: ETC.5 -rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'" +rule "ETC.5.0: Skip dossier_redaction entries if confidentiality is 'confidential'" + when + FileAttribute(label == "Confidentiality", value == "confidential") + $dossierRedaction: TextEntity(type() == "dossier_redaction") + then + $dossierRedaction.skip("ETC.5.0", "Ignore dossier_redaction when confidential"); + $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + end + +rule "ETC.5.1: Remove dossier_redaction entries if confidentiality is not 'confidential'" + salience 256 when not FileAttribute(label == "Confidentiality", value == "confidential") $dossierRedaction: TextEntity(type() == "dossier_redaction") then - $dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential"); - $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + $dossierRedaction.remove("ETC.5.1", "Remove dossier_redaction when not confidential"); + retract($dossierRedaction); end @@ -1006,7 +1224,7 @@ rule "MAN.3.2: Apply image recategorization" rule "MAN.3.3: Apply recategorization entities by default" salience 128 when - $entity: IEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) then $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end @@ -1042,8 +1260,8 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, active() || skipped()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) + $larger: TextEntity($type: type(), $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -1071,7 +1289,7 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -1084,7 +1302,7 @@ rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM salience 64 when $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -1096,7 +1314,7 @@ rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY salience 256 when $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); @@ -1109,7 +1327,7 @@ rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); retract($recommendation); @@ -1119,7 +1337,7 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI salience 256 when $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); @@ -1127,26 +1345,26 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI // Rule unit: X.6 -rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type ENTITY" +rule "X.6.0: Remove Entity of lower rank, when contained by entity of type ENTITY or HINT" salience 32 when $higherRank: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active()) + $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges()) then $lowerRank.getIntersectingNodes().forEach(node -> update(node)); - $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY"); + $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY or HINT"); retract($lowerRank); end -rule "X.6.1: remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity" +rule "X.6.1: remove Entity, when contained in another entity of type ENTITY or HINT with larger text range" salience 32 when - $higherRank: TextEntity($type: type(), $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active(), !hasManualChanges()) - $lowerRank: TextEntity(intersects($higherRank), type() != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), active(), $lowerRank.getValue().length() > $value.length()) + $outer: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $inner: TextEntity(containedBy($outer), type() != $type, $outer.getTextRange().length > getTextRange().length(), !hasManualChanges()) then - $higherRank.getIntersectingNodes().forEach(node -> update(node)); - $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); - retract($higherRank); + $inner.getIntersectingNodes().forEach(node -> update(node)); + $inner.remove("X.6.1", "remove Entity, when contained in another entity of type ENTITY or HINT with larger text range"); + retract($inner); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl index f270a177..aaebfcdb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl @@ -80,12 +80,12 @@ rule "CBI.0.0: Redact CBI Authors (non vertebrate Study)" //------------------------------------ PII rules ------------------------------------ // Rule unit: PII.0 -rule "PII.0.0: Redact all PII (non vertebrate study)" +rule "PII.0.1: Redact all PII (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end @@ -212,7 +212,7 @@ rule "MAN.3.1: Apply entity recategorization of same type" rule "MAN.3.3: Apply recategorization entities by default" salience 128 when - $entity: IEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) then $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end @@ -248,8 +248,8 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, active() || skipped()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) + $larger: TextEntity($type: type(), $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -277,7 +277,7 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -290,7 +290,7 @@ rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM salience 64 when $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -302,7 +302,7 @@ rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY salience 256 when $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); @@ -315,7 +315,7 @@ rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); retract($recommendation); @@ -325,7 +325,7 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI salience 256 when $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); @@ -333,26 +333,26 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI // Rule unit: X.6 -rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type ENTITY" +rule "X.6.0: Remove Entity of lower rank, when contained by entity of type ENTITY or HINT" salience 32 when $higherRank: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active()) + $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges()) then $lowerRank.getIntersectingNodes().forEach(node -> update(node)); - $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY"); + $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY or HINT"); retract($lowerRank); end -rule "X.6.1: remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity" +rule "X.6.1: remove Entity, when contained in another entity of type ENTITY or HINT with larger text range" salience 32 when - $higherRank: TextEntity($type: type(), $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active(), !hasManualChanges()) - $lowerRank: TextEntity(intersects($higherRank), type() != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), active(), $lowerRank.getValue().length() > $value.length()) + $outer: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $inner: TextEntity(containedBy($outer), type() != $type, $outer.getTextRange().length > getTextRange().length(), !hasManualChanges()) then - $higherRank.getIntersectingNodes().forEach(node -> update(node)); - $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); - retract($higherRank); + $inner.getIntersectingNodes().forEach(node -> update(node)); + $inner.remove("X.6.1", "remove Entity, when contained in another entity of type ENTITY or HINT with larger text range"); + retract($inner); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl index db7b1379..a40618d5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl @@ -338,7 +338,7 @@ rule "MAN.3.2: Apply image recategorization" rule "MAN.3.3: Apply recategorization entities by default" salience 128 when - $entity: IEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) then $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end @@ -374,8 +374,8 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, active() || skipped()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) + $larger: TextEntity($type: type(), $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -387,7 +387,7 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -400,7 +400,7 @@ rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM salience 64 when $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -412,7 +412,7 @@ rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY salience 256 when $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); @@ -425,7 +425,7 @@ rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); retract($recommendation); @@ -435,7 +435,7 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI salience 256 when $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); @@ -443,26 +443,26 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI // Rule unit: X.6 -rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type ENTITY" +rule "X.6.0: Remove Entity of lower rank, when contained by entity of type ENTITY or HINT" salience 32 when $higherRank: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active()) + $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges()) then $lowerRank.getIntersectingNodes().forEach(node -> update(node)); - $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY"); + $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY or HINT"); retract($lowerRank); end -rule "X.6.1: remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity" +rule "X.6.1: remove Entity, when contained in another entity of type ENTITY or HINT with larger text range" salience 32 when - $higherRank: TextEntity($type: type(), $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active(), !hasManualChanges()) - $lowerRank: TextEntity(intersects($higherRank), type() != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), active(), $lowerRank.getValue().length() > $value.length()) + $outer: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $inner: TextEntity(containedBy($outer), type() != $type, $outer.getTextRange().length > getTextRange().length(), !hasManualChanges()) then - $higherRank.getIntersectingNodes().forEach(node -> update(node)); - $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); - retract($higherRank); + $inner.getIntersectingNodes().forEach(node -> update(node)); + $inner.remove("X.6.1", "remove Entity, when contained in another entity of type ENTITY or HINT with larger text range"); + retract($inner); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/test_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/test_rules.drl index fb460944..2c299166 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/test_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/test_rules.drl @@ -238,7 +238,7 @@ rule "MAN.3.2: Apply image recategorization" rule "MAN.3.3: Apply recategorization entities by default" salience 128 when - $entity: IEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) then $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end @@ -274,8 +274,8 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, active() || skipped()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) + $larger: TextEntity($type: type(), $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -303,7 +303,7 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -316,7 +316,7 @@ rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM salience 64 when $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -328,7 +328,7 @@ rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY salience 256 when $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); @@ -341,7 +341,7 @@ rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); retract($recommendation); @@ -351,7 +351,7 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI salience 256 when $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); @@ -359,26 +359,26 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI // Rule unit: X.6 -rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type ENTITY" +rule "X.6.0: Remove Entity of lower rank, when contained by entity of type ENTITY or HINT" salience 32 when $higherRank: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active()) + $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges()) then $lowerRank.getIntersectingNodes().forEach(node -> update(node)); - $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY"); + $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY or HINT"); retract($lowerRank); end -rule "X.6.1: remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity" +rule "X.6.1: remove Entity, when contained in another entity of type ENTITY or HINT with larger text range" salience 32 when - $higherRank: TextEntity($type: type(), $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active(), !hasManualChanges()) - $lowerRank: TextEntity(intersects($higherRank), type() != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), active(), $lowerRank.getValue().length() > $value.length()) + $outer: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $inner: TextEntity(containedBy($outer), type() != $type, $outer.getTextRange().length > getTextRange().length(), !hasManualChanges()) then - $higherRank.getIntersectingNodes().forEach(node -> update(node)); - $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); - retract($higherRank); + $inner.getIntersectingNodes().forEach(node -> update(node)); + $inner.remove("X.6.1", "remove Entity, when contained in another entity of type ENTITY or HINT with larger text range"); + retract($inner); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/BasicTable.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/BasicTable.pdf new file mode 100644 index 00000000..f692fccb Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/BasicTable.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl index 8b28390c..ecdd6e65 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl @@ -449,14 +449,23 @@ rule "ETC.3.1: Redact logos (non vertebrate study)" // Rule unit: ETC.5 -rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'" +rule "ETC.5.0: Skip dossier_redaction entries if confidentiality is 'confidential'" + when + FileAttribute(label == "Confidentiality", value == "confidential") + $dossierRedaction: TextEntity(type() == "dossier_redaction") + then + $dossierRedaction.skip("ETC.5.0", "Ignore dossier_redaction when confidential"); + $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + end + +rule "ETC.5.1: Remove dossier_redaction entries if confidentiality is not 'confidential'" + salience 256 when not FileAttribute(label == "Confidentiality", value == "confidential") $dossierRedaction: TextEntity(type() == "dossier_redaction") then - $dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential"); - update($dossierRedaction); - $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + $dossierRedaction.remove("ETC.5.1", "Remove dossier_redaction when not confidential"); + retract($dossierRedaction); end diff --git a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl index c29f78a0..01244c8f 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl @@ -171,6 +171,7 @@ rule "CBI.3.1: Redacted because table row contains a vertebrate" }); end + rule "CBI.3.2: Do not redact because Section does not contain a vertebrate" when $section: Section(!hasTables(), !hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) @@ -207,7 +208,7 @@ rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is fo }); end -rule "CBI.4.1: Do not redact Names and Addresses if no_redaction_indicator is found in table row" +rule "CBI.4.1: Don't redact authors or addresses which appear in the same row as a vertebrate and a no_redaction_indicator" when $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("vertebrate"), @@ -262,7 +263,7 @@ rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also red "no_redaction_indicator but also redaction_indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)", Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), + $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity).stream(), $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() ); }); @@ -336,6 +337,32 @@ rule "CBI.7.1: Do not redact Names and Addresses if published information found $authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress)); end +rule "CBI.7.2: Do not redact PII if published information found in Section without tables" + when + $section: Section(!hasTables(), + hasEntitiesOfType("published_information"), + hasEntitiesOfType("PII")) + then + $section.getEntitiesOfType("PII") + .forEach(redactionEntity -> { + redactionEntity.skipWithReferences( + "CBI.7.2", + "Published Information found in section", + $section.getEntitiesOfType("published_information") + ); + }); + end + +rule "CBI.7.3: Do not redact PII if published information found in same table row" + when + $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII")) + $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() + $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() + $pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities() + then + $pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii)); + end + // Rule unit: CBI.8 rule "CBI.8.0: Redacted because Section contains must_redact entity" @@ -361,11 +388,11 @@ rule "CBI.8.1: Redacted because table row contains must_redact entity" .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) .forEach(entity -> { entity.applyWithReferences( - "CBI.8.1", - "must_redact entity found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("must_redact", entity) - ); + "CBI.8.1", + "Must_redact found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("must_redact", entity) + ); }); end @@ -397,6 +424,19 @@ rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end +rule "CBI.9.2: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.10 rule "CBI.10.0: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" @@ -425,6 +465,32 @@ rule "CBI.10.1: Redact all cells with Header Author as CBI_author (vertebrate st .forEach(redactionEntity -> redactionEntity.redact("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "CBI.10.2: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "CBI.10.3: Redact all cells with Header Author as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author")) + then + $table.streamTableCellsWithHeader("Author") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.3", "Author found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.11 rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" @@ -438,7 +504,22 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study // Rule unit: CBI.12 -rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (non vertebrate study)" +rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes'" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) + TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() + TableCell(header, containsStringIgnoreCase("Vertebrate study Y/N"), $vertebrateCol: col) from $table.streamHeaders().toList() + $rowCell: TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() + TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() + then + entityCreationService.bySemanticNode($rowCell, "must_redact", EntityType.HINT) + .ifPresent(yesEntity -> { + yesEntity.skip("CBI.12.0", "must_redact"); + }); + end + +rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -448,16 +529,15 @@ rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() $authorCell: TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() then - entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) .ifPresent(authorEntity -> { - authorEntity.redact("CBI.12.0", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + authorEntity.redact("CBI.12.1", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.addMultipleAuthorsAsRecommendation(authorEntity); }); end -rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (vertebrate study)" +rule "CBI.12.2: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -470,13 +550,13 @@ rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) .ifPresent(authorEntity -> { - authorEntity.redact("CBI.12.1", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + authorEntity.redact("CBI.12.2", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.addMultipleAuthorsAsRecommendation(authorEntity); }); end -rule "CBI.12.2: Skip TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'No'" +rule "CBI.12.3: Skip TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'No'" when $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() @@ -485,7 +565,7 @@ rule "CBI.12.2: Skip TableCell with header 'Author' or 'Author(s)' and header 'V $authorCell: TableCell(row == $rowWithNo) from $table.streamCol($authorCol).toList() then entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) - .ifPresent(authorEntity -> authorEntity.skip("CBI.12.2", "Not redacted because it's row does not belong to a vertebrate study")); + .ifPresent(authorEntity -> authorEntity.skip("CBI.12.3", "Not redacted because it's row does not belong to a vertebrate study")); end @@ -503,9 +583,12 @@ rule "CBI.13.0: Ignore CBI Address recommendations" // Rule unit: CBI.14 rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at\"" when + $section: Section(containsStringIgnoreCase("batches produced at")) $sponsorEntity: TextEntity(type() == "CBI_sponsor", textBefore.contains("batches produced at")) then $sponsorEntity.redact("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + entityCreationService.byString("batches produced at", "must_redact", EntityType.HINT, $section) + .forEach(entity -> entity.skip("CBI.14.0", "must_redact")); end @@ -526,10 +609,10 @@ rule "CBI.15.0: Redact row if row contains \"determination of residues\" and liv containsStringIgnoreCase($residueKeyword), containsStringIgnoreCase($keyword)) then - entityCreationService.byString($keyword, "must_redact", EntityType.ENTITY, $section) - .toList(); + entityCreationService.byString($keyword, "must_redact", EntityType.HINT, $section) + .forEach(entity -> entity.skip("CBI.15.0", "must_redact")); - $section.getEntitiesOfType(List.of($keyword, $residueKeyword)) + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(redactionEntity -> redactionEntity.redact("CBI.15.0", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end @@ -547,8 +630,8 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio $residueKeyword: String() from List.of("determination of residues", "determination of total residues") $table: Table(containsStringIgnoreCase($residueKeyword), containsStringIgnoreCase($keyword)) then - entityCreationService.byString($keyword, "must_redact", EntityType.ENTITY, $table) - .toList(); + entityCreationService.byString($keyword, "must_redact", EntityType.HINT, $table) + .forEach(entity -> entity.skip("CBI.15.1", "must_redact")); $table.streamEntitiesWhereRowContainsStringsIgnoreCase(List.of($keyword, $residueKeyword)) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) @@ -557,7 +640,19 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio // Rule unit: CBI.16 -rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" +rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -565,12 +660,12 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end -rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" +rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -578,7 +673,19 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); dictionary.recommendEverywhere(entity); }); end @@ -636,7 +743,19 @@ rule "CBI.19.0: Expand CBI_author entities with salutation prefix" // Rule unit: CBI.20 -rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" +rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\")" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.0", "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + +rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -644,12 +763,12 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study"); + laboratoryEntity.skip("CBI.20.1", "PERFORMING LABORATORY was found for non vertebrate study"); dictionary.recommendEverywhere(laboratoryEntity); }); end -rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" +rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -657,11 +776,24 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.redact("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + laboratoryEntity.redact("CBI.20.2", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(laboratoryEntity); }); end +rule "CBI.20.3: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\"" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.3", "PERFORMING LABORATORY was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + + // Rule unit: CBI.21 rule "CBI.21.0: Redact short Authors section (non vertebrate study)" when @@ -703,53 +835,116 @@ rule "CBI.22.0: Redact Addresses in Reference Tables for vertebrate studies in n //------------------------------------ PII rules ------------------------------------ // Rule unit: PII.0 -rule "PII.0.0: Redact all PII (non vertebrate study)" +rule "PII.0.0: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.0", "Personal Information found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "PII.0.1: Redact all PII (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "PII.0.1: Redact all PII (vertebrate study)" +rule "PII.0.2: Redact all PII (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.2", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + +rule "PII.0.3: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.3", "Personal Information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); end // Rule unit: PII.1 -rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)" +rule "PII.1.0: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.1.1: Redact Emails by RegEx (Non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" +rule "PII.1.2: Redact Emails by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end -rule "PII.1.2: Redact typoed Emails with indicator" +rule "PII.1.3: Redact typoed Emails with indicator" when $section: Section(containsString("@") || containsStringIgnoreCase("mail")) then - entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.3", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "PII.1.4: Redact typoed Emails with indicator" + when + $section: Section(containsString("@") || containsStringIgnoreCase("mail")) + then + entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.4", "Personal information found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.1.5: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.5", "Found by Email Regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "PII.1.6: Redact typoed Emails with indicator" + when + $section: Section(containsString("@") || containsStringIgnoreCase("mail")) + then + entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.6", "Personal information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); end // Rule unit: PII.2 -rule "PII.2.0: Redact Phone and Fax by RegEx (non vertebrate study)" +rule "PII.2.0: Redact Phone and Fax by RegEx" + when + $section: Section(containsString("Contact") || + containsString("Telephone") || + containsString("Phone") || + containsString("Ph.") || + containsString("Fax") || + containsString("Tel") || + containsString("Ter") || + containsString("Mobile") || + containsString("Fel") || + containsString("Fer")) + then + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + .forEach(contactEntity -> contactEntity.redact("PII.2.0", "Found by Phone and Fax Regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.2.1: Redact Phone and Fax by RegEx (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("Contact") || @@ -764,10 +959,10 @@ rule "PII.2.0: Redact Phone and Fax by RegEx (non vertebrate study)" containsString("Fer")) then entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> contactEntity.redact("PII.2.0", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" +rule "PII.2.2: Redact Phone and Fax by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("Contact") || @@ -782,41 +977,48 @@ rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" containsString("Fer")) then entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.2.2", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end -rule "PII.2.2: Redact phone numbers without indicators" +rule "PII.2.3: Redact phone numbers without indicators" when $section: Section(containsString("+")) then entityCreationService.byRegex("(\\+[\\dO]{1,2} )(\\([\\dO]{1,3}\\))?[\\d\\-O ]{8,15}", "PII", EntityType.ENTITY, $section) - .forEach(entity -> entity.redact("PII.2.2", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.2.3", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end // Rule unit: PII.3 -rule "PII.3.0: Redact telephone numbers by RegEx (Non vertebrate study)" +rule "PII.3.0: Redact telephone numbers by RegEx" + when + $section: Section(matchesRegex("[+]\\d{1,}")) + then + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) + .forEach(entity -> entity.redact("PII.3.0", "Telephone number found by regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.3.1: Redact telephone numbers by RegEx (Non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.redact("PII.3.0", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.3.1: Redact telephone numbers by RegEx (vertebrate study)" +rule "PII.3.2: Redact telephone numbers by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.3.2", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.4 -rule "PII.4.0: Redact line after contact information keywords (non vertebrate study)" +rule "PII.4.0: Redact line after contact information keywords" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -842,9 +1044,8 @@ rule "PII.4.0: Redact line after contact information keywords (non vertebrate st .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.4.1: Redact line after contact information keywords (vertebrate study)" +rule "PII.4.1: Redact line after contact information keywords" when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -867,12 +1068,24 @@ rule "PII.4.1: Redact line after contact information keywords (vertebrate study) $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); end // Rule unit: PII.5 -rule "PII.5.0: Redact line after contact information keywords reduced (non vertebrate study)" +rule "PII.5.0: Redact line after contact information keywords reduced" + when + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.5.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.5.1: Redact line after contact information keywords reduced (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", @@ -882,10 +1095,10 @@ rule "PII.5.0: Redact line after contact information keywords reduced (non verte $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.5.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.5.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.5.1: Redact line after contact information keywords reduced (Vertebrate study)" +rule "PII.5.2: Redact line after contact information keywords reduced (Vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", @@ -895,12 +1108,23 @@ rule "PII.5.1: Redact line after contact information keywords reduced (Vertebrat $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.5.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.5.2", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.6 -rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" +rule "PII.6.0: Redact line between contact keywords" + when + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.6.1: Redact line between contact keywords (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) @@ -909,10 +1133,10 @@ rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.6.1: Redact line between contact keywords (vertebrate study)" +rule "PII.6.2: Redact line between contact keywords (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) @@ -921,12 +1145,41 @@ rule "PII.6.1: Redact line between contact keywords (vertebrate study)" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + +rule "PII.6.3: Redact line between contact keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.3", "Found between contact keywords", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); end // Rule unit: PII.7 -rule "PII.7.0: Redact contact information if applicant is found (non vertebrate study)" +rule "PII.7.0: Redact contact information if applicant is found" + when + $section: Section(getHeadline().containsString("applicant") || + getHeadline().containsString("Primary contact") || + getHeadline().containsString("Alternative contact") || + containsString("Applicant") || + containsString("Telephone number:")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.7.1: Redact contact information if applicant is found (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(getHeadline().containsString("applicant") || @@ -941,10 +1194,10 @@ rule "PII.7.0: Redact contact information if applicant is found (non vertebrate entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.7.1: Redact contact information if applicant is found (vertebrate study)" +rule "PII.7.2: Redact contact information if applicant is found (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(getHeadline().containsString("applicant") || @@ -959,14 +1212,13 @@ rule "PII.7.1: Redact contact information if applicant is found (vertebrate stud entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.2", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.8 -rule "PII.8.0: Redact contact information if producer is found (non vertebrate study)" +rule "PII.8.0: Redact contact information if producer is found" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsStringIgnoreCase("producer of the plant protection") || containsStringIgnoreCase("producer of the active substance") || containsStringIgnoreCase("manufacturer of the active substance") || @@ -982,7 +1234,25 @@ rule "PII.8.0: Redact contact information if producer is found (non vertebrate s .forEach(entity -> entity.redact("PII.8.0", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.8.1: Redact contact information if producer is found (vertebrate study)" +rule "PII.8.1: Redact contact information if producer is found (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsStringIgnoreCase("producer of the plant protection") || + containsStringIgnoreCase("producer of the active substance") || + containsStringIgnoreCase("manufacturer of the active substance") || + containsStringIgnoreCase("manufacturer:") || + containsStringIgnoreCase("Producer or producers of the active substance")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "PII.8.2: Redact contact information if producer is found (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsStringIgnoreCase("producer of the plant protection") || @@ -997,27 +1267,43 @@ rule "PII.8.1: Redact contact information if producer is found (vertebrate study entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.8.2", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.9 -rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" +rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" +rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + +rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); end @@ -1104,38 +1390,66 @@ rule "ETC.1.0: Redact Purity" // Rule unit: ETC.2 -rule "ETC.2.0: Redact signatures (non vertebrate study)" +rule "ETC.2.0: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.0", "Signature Found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "ETC.2.1: Redact signatures (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.redact("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "ETC.2.1: Redact signatures (vertebrate study)" +rule "ETC.2.2: Redact signatures (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + +rule "ETC.2.3: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.3", "Signature Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); end // Rule unit: ETC.3 -rule "ETC.3.0: Skip logos (non vertebrate study)" +rule "ETC.3.0: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.0", "Logo Found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "ETC.3.1: Skip logos (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.skip("ETC.3.0", "Logo Found"); + $logo.skip("ETC.3.1", "Logo Found"); end -rule "ETC.3.1: Redact logos (vertebrate study)" +rule "ETC.3.2: Redact logos (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.redact("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "ETC.3.3: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.3", "Logo Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); end @@ -1163,13 +1477,23 @@ rule "ETC.4.2: Redact dossier dictionary entries" // Rule unit: ETC.5 -rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'" +rule "ETC.5.0: Skip dossier_redaction entries if confidentiality is 'confidential'" + when + FileAttribute(label == "Confidentiality", value == "confidential") + $dossierRedaction: TextEntity(type() == "dossier_redaction") + then + $dossierRedaction.skip("ETC.5.0", "Ignore dossier_redaction when confidential"); + $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + end + +rule "ETC.5.1: Remove dossier_redaction entries if confidentiality is not 'confidential'" + salience 256 when not FileAttribute(label == "Confidentiality", value == "confidential") $dossierRedaction: TextEntity(type() == "dossier_redaction") then - $dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential"); - $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + $dossierRedaction.remove("ETC.5.1", "Remove dossier_redaction when not confidential"); + retract($dossierRedaction); end @@ -1438,7 +1762,7 @@ rule "MAN.3.2: Apply image recategorization" rule "MAN.3.3: Apply recategorization entities by default" salience 128 when - $entity: IEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) then $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end @@ -1473,8 +1797,8 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, active() || skipped()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) + $larger: TextEntity($type: type(), $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -1502,7 +1826,7 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -1515,7 +1839,7 @@ rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM salience 64 when $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -1527,7 +1851,7 @@ rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY salience 256 when $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); @@ -1540,19 +1864,17 @@ rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); retract($recommendation); end - -// Rule unit: X.5 rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATION" salience 256 when $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); @@ -1560,27 +1882,26 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI // Rule unit: X.6 -rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type ENTITY" +rule "X.6.0: Remove Entity of lower rank, when contained by entity of type ENTITY or HINT" salience 32 when $higherRank: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active()) + $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges()) then $lowerRank.getIntersectingNodes().forEach(node -> update(node)); - $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY"); + $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY or HINT"); retract($lowerRank); end - -rule "X.6.1: remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity" +rule "X.6.1: remove Entity, when contained in another entity of type ENTITY or HINT with larger text range" salience 32 when - $higherRank: TextEntity($type: type(), $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active(), !hasManualChanges()) - $lowerRank: TextEntity(intersects($higherRank), type() != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), active(), $lowerRank.getValue().length() > $value.length()) + $outer: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $inner: TextEntity(containedBy($outer), type() != $type, $outer.getTextRange().length > getTextRange().length(), !hasManualChanges()) then - $higherRank.getIntersectingNodes().forEach(node -> update(node)); - $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); - retract($higherRank); + $inner.getIntersectingNodes().forEach(node -> update(node)); + $inner.remove("X.6.1", "remove Entity, when contained in another entity of type ENTITY or HINT with larger text range"); + retract($inner); end diff --git a/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl b/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl index 0be20320..8c1edfc3 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl @@ -1423,7 +1423,7 @@ rule "MAN.3.2: Apply image recategorization" rule "MAN.3.3: Apply recategorization entities by default" salience 128 when - $entity: IEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) then $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end @@ -1458,8 +1458,8 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, active() || skipped()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) + $larger: TextEntity($type: type(), $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -1471,7 +1471,7 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -1484,7 +1484,7 @@ rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM salience 64 when $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -1496,7 +1496,7 @@ rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY salience 256 when $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); @@ -1509,7 +1509,7 @@ rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); retract($recommendation); @@ -1521,7 +1521,7 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI salience 256 when $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); diff --git a/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileMigrationTest.java b/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileMigrationTest.java index abb7fcb6..5d78c6ad 100644 --- a/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileMigrationTest.java +++ b/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileMigrationTest.java @@ -26,8 +26,8 @@ public class RuleFileMigrationTest { // Put your redaction service drools paths and dossier-templates paths both RM and DM here static final List ruleFileDirs = List.of( "/home/kschuettler/iqser/redaction/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools", - "/home/kschuettler/iqser/fforesight/dossier-templates-v2/", - "/home/kschuettler/iqser/redaction/dossier-templates-v2/"); + "/home/kschuettler/iqser/fforesight/dossier-templates-v2", + "/home/kschuettler/iqser/redaction/dossier-templates-v2"); @Test @@ -36,7 +36,11 @@ public class RuleFileMigrationTest { void migrateAllEntityRules() { for (String ruleFileDir : ruleFileDirs) { - Files.walk(Path.of(ruleFileDir)).filter(this::isEntityRuleFile).map(Path::toFile).peek(System.out::println).forEach(RuleFileMigrator::migrateFile); + Files.walk(Path.of(ruleFileDir)) + .filter(this::isEntityRuleFile) + .map(Path::toFile) + .peek(System.out::println) + .forEach(RuleFileMigrator::migrateFile); } }