From a8ed18bde41f86275e0206022e583f84f18e8d45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kilian=20Sch=C3=BCttler?= Date: Fri, 22 Sep 2023 10:08:25 +0200 Subject: [PATCH] RED-7307: make ManualEntities applied by default --- .../entity/ManualChangeOverwrite.java | 4 - .../service/ComponentLogCreatorService.java | 2 +- .../service/RedactionLogCreatorService.java | 3 +- .../document/ComponentCreationService.java | 101 +++++++++++++++++- .../document/ManualEntityCreationService.java | 15 +-- .../src/test/resources/drools/adama-pilot.drl | 5 +- .../drools/documine_flora_components.drl | 8 ++ 7 files changed, 119 insertions(+), 19 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/ManualChangeOverwrite.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/ManualChangeOverwrite.java index 76a21630..a6b0ee3e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/ManualChangeOverwrite.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/ManualChangeOverwrite.java @@ -108,10 +108,6 @@ public class ManualChangeOverwrite { type = recategorization.getType(); } - if (manualChange instanceof ManualRedactionEntry redactionEntry) { - legalBasis = redactionEntry.getLegalBasis(); - } - descriptions.add(MANUAL_CHANGE_DESCRIPTIONS.get(manualChange.getClass())); } changed = false; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ComponentLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ComponentLogCreatorService.java index 4a7d65dc..9db4087a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ComponentLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ComponentLogCreatorService.java @@ -33,7 +33,7 @@ public class ComponentLogCreatorService { private ComponentLogEntry buildComponentLogEntry(Component component) { return ComponentLogEntry.builder() - .value(component.getValue()) + .value(component.getValue()).matchedRule(component.getMatchedRule().toString()) .transformation(component.getTransformation()) .componentEntityReferences(toComponentEntityReferences(component.getReferences().stream().sorted(EntityComparators.start()).toList())) .build(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/RedactionLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/RedactionLogCreatorService.java index 55773f59..32928912 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/RedactionLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/RedactionLogCreatorService.java @@ -193,8 +193,7 @@ public class RedactionLogCreatorService { .isRecommendation(manualEntity.getEntityType().equals(EntityType.RECOMMENDATION)) .isFalsePositive(manualEntity.getEntityType().equals(EntityType.FALSE_POSITIVE) || manualEntity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION)) .section(manualEntity.getManualOverwrite().getSection().orElse(manualEntity.getSection())) - .sectionNumber(0) - .matchedRule("ManualRedaction") + .sectionNumber(0).matchedRule(manualEntity.getMatchedRule().getRuleIdentifier().toString()) .rectangle(manualEntity.isRectangle()) .isDictionaryEntry(manualEntity.isDictionaryEntry()) .isDossierDictionaryEntry(manualEntity.isDossierDictionaryEntry()) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java index 51c3c41d..8b64f02d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java @@ -1,10 +1,15 @@ package com.iqser.red.service.redaction.v1.server.service.document; +import java.text.BreakIterator; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.HashSet; import java.util.LinkedList; import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.OptionalInt; import java.util.Set; import java.util.stream.Collectors; @@ -41,6 +46,18 @@ public class ComponentCreationService { } + public void joiningFromFirstSectionOnly(String ruleIdentifier, String category, Collection entities) { + + joiningFromFirstSectionOnly(ruleIdentifier, category, entities, ", "); + } + + + public void joiningUniqueFromFirstSectionOnly(String ruleIdentifier, String category, Collection entities) { + + joiningUniqueFromFirstSectionOnly(ruleIdentifier, category, entities, ", "); + } + + public void joining(String ruleIdentifier, String category, Collection entities, String delimiter) { String transformation = String.format("Joining all values with '%s'", delimiter); @@ -49,9 +66,71 @@ public class ComponentCreationService { } + public void joiningFromFirstSectionOnly(String ruleIdentifier, String category, Collection entities, String delimiter) { + + List entitiesFromFirstSection = findEntitiesFromFirstSection(entities); + joining(ruleIdentifier, category, entitiesFromFirstSection, delimiter); + } + + + private static List findEntitiesFromFirstSection(Collection entities) { + + var entitiesBySection = entities.stream().collect(Collectors.groupingBy(Entity::getSectionNumber)); + OptionalInt firstSection = entitiesBySection.keySet().stream().mapToInt(Integer::intValue).min(); + if (firstSection.isEmpty()) { + return Collections.emptyList(); + } + return entitiesBySection.get(firstSection.getAsInt()); + } + + + public void joiningFromLongestSectionOnly(String ruleIdentifier, String category, Collection entities, String delimiter) { + + List entitiesFromLongestSection = findEntitiesFromLongestSection(entities); + joining(ruleIdentifier, category, entitiesFromLongestSection, delimiter); + } + + + public void joiningUniqueFromLongestSectionOnly(String ruleIdentifier, String category, Collection entities, String delimiter) { + + List entitiesFromLongestSection = findEntitiesFromLongestSection(entities); + joiningUnique(ruleIdentifier, category, entitiesFromLongestSection, delimiter); + } + + + private static List findEntitiesFromLongestSection(Collection entities) { + + var entitiesBySection = entities.stream().collect(Collectors.groupingBy(Entity::getSectionNumber)); + OptionalInt longestSection = entitiesBySection.entrySet() + .stream() + .sorted(Comparator.comparingInt(ComponentCreationService::getTotalLengthOfEntities).reversed()) + .mapToInt(Map.Entry::getKey) + .findFirst(); + + if (longestSection.isEmpty()) { + return Collections.emptyList(); + } + + return entitiesBySection.get(longestSection.getAsInt()); + } + + + private static int getTotalLengthOfEntities(Map.Entry> entry) { + + return entry.getValue().stream().mapToInt(Entity::getLength).sum(); + } + + + public void joiningUniqueFromFirstSectionOnly(String ruleIdentifier, String category, Collection entities, String delimiter) { + + List entitiesFromFirstSection = findEntitiesFromFirstSection(entities); + joiningUnique(ruleIdentifier, category, entitiesFromFirstSection, delimiter); + } + + public void joiningUnique(String ruleIdentifier, String category, Collection entities) { - joining(ruleIdentifier, category, entities, ", "); + joiningUnique(ruleIdentifier, category, entities, ", "); } @@ -63,6 +142,23 @@ public class ComponentCreationService { } + public void asSentences(String ruleIdentifier, String category, Collection entities) { + + if (entities.isEmpty()) { + return; + } + + for (Entity entity : entities) { + BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.ENGLISH); + iterator.setText(entity.getValue()); + int start = iterator.first(); + for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) { + create(ruleIdentifier, category, entity.getValue().substring(start, end).replaceAll("\\n", "").trim(), "Split into sentences", entity); + } + } + } + + public void convertDates(String ruleIdentifier, String category, Collection entities) { convertDates(ruleIdentifier, category, entities, "dd/MM/yyyy"); @@ -92,8 +188,7 @@ public class ComponentCreationService { kieSession.insert(Component.builder() .matchedRule(RuleIdentifier.fromString(ruleIdentifier)) .category(category) - .value(value) - .transformation(transformation).references(new LinkedList<>(references)).build()); + .value(value).transformation(transformation).references(new LinkedList<>(references)).build()); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ManualEntityCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ManualEntityCreationService.java index 001870e9..0fd6373f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ManualEntityCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ManualEntityCreationService.java @@ -49,7 +49,13 @@ public class ManualEntityCreationService { public List toRedactionEntity(RedactionLog redactionLog, SemanticNode node) { - List manualEntities = redactionLog.getRedactionLogEntry().stream().map(ManualEntity::fromRedactionLogEntry).toList(); + List manualEntities = redactionLog.getRedactionLogEntry().stream().map(ManualEntity::fromRedactionLogEntry).peek(manualEntity -> { + if (manualEntity.isApplied()) { + manualEntity.apply(manualEntity.getRuleIdentifier(), manualEntity.getReason(), manualEntity.getLegalBasis()); + } else { + manualEntity.skip(manualEntity.getRuleIdentifier(), manualEntity.getReason()); + } + }).toList(); return toRedactionEntity(manualEntities, node); } @@ -59,6 +65,7 @@ public class ManualEntityCreationService { List manualEntities = manualRedactionEntries.stream() .filter(manualRedactionEntry -> !(manualRedactionEntry.isAddToDictionary() || manualRedactionEntry.isAddToDossierDictionary())) .map(ManualEntity::fromManualRedactionEntry) + .peek(manualEntity -> manualEntity.apply("MAN.5.0", "manual entries are applied by default", manualEntity.getLegalBasis())) .toList(); return toRedactionEntity(manualEntities, node); @@ -97,11 +104,7 @@ public class ManualEntityCreationService { TextEntity correctEntity = entityCreationService.forceByTextRange(closestTextRange, manualEntity.getType(), manualEntity.getEntityType(), node); - if (manualEntity.isApplied()) { - correctEntity.apply(manualEntity.getRuleIdentifier(), manualEntity.getReason(), manualEntity.getLegalBasis()); - } else { - correctEntity.skip(manualEntity.getRuleIdentifier(), manualEntity.getReason()); - } + correctEntity.addMatchedRules(manualEntity.getMatchedRuleList()); correctEntity.setDictionaryEntry(manualEntity.isDictionaryEntry()); correctEntity.setDossierDictionaryEntry(manualEntity.isDossierDictionaryEntry()); correctEntity.getManualOverwrite().addChanges(manualEntity.getManualOverwrite().getManualChangeLog()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/adama-pilot.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/adama-pilot.drl index d8b1054c..4a190ab0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/adama-pilot.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/adama-pilot.drl @@ -301,9 +301,8 @@ rule "DOC.4.1: study title on cover page between sections" ) then List startStrings = List.of("Study Title", "Study Title:", "Title", "Final Report", "Final Study Report", "Report 92 50 12 136"); - - - List stopStrings = List.of("Guideline", "Guidelines", "Study Identification", "Data Requirement", "Submitted", "Test Guideline", "Study Director", "Author", "Including:", "Laboratory Investigations", "Test Article", "HLS", "Official Journal"); + List stopStrings = List.of("Guideline", "Guidelines", "Study Identification", "Data Requirement", "Submitted", "Test Guideline", + "Study Director", "Author", "Including:", "Laboratory Investigations", "Test Article", "HLS", "Official Journal"); // too many false positives due to term in header and cover page stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Final Report", $page.getMainBodyTextBlock())); entityCreationService.shortestBetweenAnyString(startStrings, stopStrings, "title", EntityType.ENTITY, document).forEach(entity -> { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl index 6f4c71a6..e98d02fb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl @@ -203,6 +203,14 @@ rule "DefaultComponents.8.0: Certificate of analysis batch identification" componentCreationService.joiningUnique("DefaultComponents.8.0", "Batch_Number", $batchNumbers); end +rule "StudyConclusion.0.0: Study conclusion in first found section" + when + FileAttribute(label == "oecd_number", value == "425" || value == "430") + $studyConclusions: List() from collect(Entity(type == "study_conclusion")) + then + componentCreationService.joiningUniqueFromFirstSectionOnly("Study_Conclusion.0.0", "Study_Conclusion", $studyConclusions); + end + rule "DefaultComponents.999.0: Create components for all unmapped entities." salience -999