Merge branch 'RED-7307' into 'master'

RED-7307: make ManualEntities applied by default

Closes RED-7307

See merge request redactmanager/redaction-service!138
This commit is contained in:
Kilian Schüttler 2023-09-22 10:08:25 +02:00
commit a6b82f6813
7 changed files with 119 additions and 19 deletions

View File

@ -108,10 +108,6 @@ public class ManualChangeOverwrite {
type = recategorization.getType();
}
if (manualChange instanceof ManualRedactionEntry redactionEntry) {
legalBasis = redactionEntry.getLegalBasis();
}
descriptions.add(MANUAL_CHANGE_DESCRIPTIONS.get(manualChange.getClass()));
}
changed = false;

View File

@ -33,7 +33,7 @@ public class ComponentLogCreatorService {
private ComponentLogEntry buildComponentLogEntry(Component component) {
return ComponentLogEntry.builder()
.value(component.getValue())
.value(component.getValue()).matchedRule(component.getMatchedRule().toString())
.transformation(component.getTransformation())
.componentEntityReferences(toComponentEntityReferences(component.getReferences().stream().sorted(EntityComparators.start()).toList()))
.build();

View File

@ -193,8 +193,7 @@ public class RedactionLogCreatorService {
.isRecommendation(manualEntity.getEntityType().equals(EntityType.RECOMMENDATION))
.isFalsePositive(manualEntity.getEntityType().equals(EntityType.FALSE_POSITIVE) || manualEntity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION))
.section(manualEntity.getManualOverwrite().getSection().orElse(manualEntity.getSection()))
.sectionNumber(0)
.matchedRule("ManualRedaction")
.sectionNumber(0).matchedRule(manualEntity.getMatchedRule().getRuleIdentifier().toString())
.rectangle(manualEntity.isRectangle())
.isDictionaryEntry(manualEntity.isDictionaryEntry())
.isDossierDictionaryEntry(manualEntity.isDossierDictionaryEntry())

View File

@ -1,10 +1,15 @@
package com.iqser.red.service.redaction.v1.server.service.document;
import java.text.BreakIterator;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.OptionalInt;
import java.util.Set;
import java.util.stream.Collectors;
@ -41,6 +46,18 @@ public class ComponentCreationService {
}
public void joiningFromFirstSectionOnly(String ruleIdentifier, String category, Collection<Entity> entities) {
joiningFromFirstSectionOnly(ruleIdentifier, category, entities, ", ");
}
public void joiningUniqueFromFirstSectionOnly(String ruleIdentifier, String category, Collection<Entity> entities) {
joiningUniqueFromFirstSectionOnly(ruleIdentifier, category, entities, ", ");
}
public void joining(String ruleIdentifier, String category, Collection<Entity> entities, String delimiter) {
String transformation = String.format("Joining all values with '%s'", delimiter);
@ -49,9 +66,71 @@ public class ComponentCreationService {
}
public void joiningFromFirstSectionOnly(String ruleIdentifier, String category, Collection<Entity> entities, String delimiter) {
List<Entity> entitiesFromFirstSection = findEntitiesFromFirstSection(entities);
joining(ruleIdentifier, category, entitiesFromFirstSection, delimiter);
}
private static List<Entity> findEntitiesFromFirstSection(Collection<Entity> entities) {
var entitiesBySection = entities.stream().collect(Collectors.groupingBy(Entity::getSectionNumber));
OptionalInt firstSection = entitiesBySection.keySet().stream().mapToInt(Integer::intValue).min();
if (firstSection.isEmpty()) {
return Collections.emptyList();
}
return entitiesBySection.get(firstSection.getAsInt());
}
public void joiningFromLongestSectionOnly(String ruleIdentifier, String category, Collection<Entity> entities, String delimiter) {
List<Entity> entitiesFromLongestSection = findEntitiesFromLongestSection(entities);
joining(ruleIdentifier, category, entitiesFromLongestSection, delimiter);
}
public void joiningUniqueFromLongestSectionOnly(String ruleIdentifier, String category, Collection<Entity> entities, String delimiter) {
List<Entity> entitiesFromLongestSection = findEntitiesFromLongestSection(entities);
joiningUnique(ruleIdentifier, category, entitiesFromLongestSection, delimiter);
}
private static List<Entity> findEntitiesFromLongestSection(Collection<Entity> entities) {
var entitiesBySection = entities.stream().collect(Collectors.groupingBy(Entity::getSectionNumber));
OptionalInt longestSection = entitiesBySection.entrySet()
.stream()
.sorted(Comparator.comparingInt(ComponentCreationService::getTotalLengthOfEntities).reversed())
.mapToInt(Map.Entry::getKey)
.findFirst();
if (longestSection.isEmpty()) {
return Collections.emptyList();
}
return entitiesBySection.get(longestSection.getAsInt());
}
private static int getTotalLengthOfEntities(Map.Entry<Integer, List<Entity>> entry) {
return entry.getValue().stream().mapToInt(Entity::getLength).sum();
}
public void joiningUniqueFromFirstSectionOnly(String ruleIdentifier, String category, Collection<Entity> entities, String delimiter) {
List<Entity> entitiesFromFirstSection = findEntitiesFromFirstSection(entities);
joiningUnique(ruleIdentifier, category, entitiesFromFirstSection, delimiter);
}
public void joiningUnique(String ruleIdentifier, String category, Collection<Entity> entities) {
joining(ruleIdentifier, category, entities, ", ");
joiningUnique(ruleIdentifier, category, entities, ", ");
}
@ -63,6 +142,23 @@ public class ComponentCreationService {
}
public void asSentences(String ruleIdentifier, String category, Collection<Entity> entities) {
if (entities.isEmpty()) {
return;
}
for (Entity entity : entities) {
BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.ENGLISH);
iterator.setText(entity.getValue());
int start = iterator.first();
for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) {
create(ruleIdentifier, category, entity.getValue().substring(start, end).replaceAll("\\n", "").trim(), "Split into sentences", entity);
}
}
}
public void convertDates(String ruleIdentifier, String category, Collection<Entity> entities) {
convertDates(ruleIdentifier, category, entities, "dd/MM/yyyy");
@ -92,8 +188,7 @@ public class ComponentCreationService {
kieSession.insert(Component.builder()
.matchedRule(RuleIdentifier.fromString(ruleIdentifier))
.category(category)
.value(value)
.transformation(transformation).references(new LinkedList<>(references)).build());
.value(value).transformation(transformation).references(new LinkedList<>(references)).build());
}

View File

@ -49,7 +49,13 @@ public class ManualEntityCreationService {
public List<ManualEntity> toRedactionEntity(RedactionLog redactionLog, SemanticNode node) {
List<ManualEntity> manualEntities = redactionLog.getRedactionLogEntry().stream().map(ManualEntity::fromRedactionLogEntry).toList();
List<ManualEntity> manualEntities = redactionLog.getRedactionLogEntry().stream().map(ManualEntity::fromRedactionLogEntry).peek(manualEntity -> {
if (manualEntity.isApplied()) {
manualEntity.apply(manualEntity.getRuleIdentifier(), manualEntity.getReason(), manualEntity.getLegalBasis());
} else {
manualEntity.skip(manualEntity.getRuleIdentifier(), manualEntity.getReason());
}
}).toList();
return toRedactionEntity(manualEntities, node);
}
@ -59,6 +65,7 @@ public class ManualEntityCreationService {
List<ManualEntity> manualEntities = manualRedactionEntries.stream()
.filter(manualRedactionEntry -> !(manualRedactionEntry.isAddToDictionary() || manualRedactionEntry.isAddToDossierDictionary()))
.map(ManualEntity::fromManualRedactionEntry)
.peek(manualEntity -> manualEntity.apply("MAN.5.0", "manual entries are applied by default", manualEntity.getLegalBasis()))
.toList();
return toRedactionEntity(manualEntities, node);
@ -97,11 +104,7 @@ public class ManualEntityCreationService {
TextEntity correctEntity = entityCreationService.forceByTextRange(closestTextRange, manualEntity.getType(), manualEntity.getEntityType(), node);
if (manualEntity.isApplied()) {
correctEntity.apply(manualEntity.getRuleIdentifier(), manualEntity.getReason(), manualEntity.getLegalBasis());
} else {
correctEntity.skip(manualEntity.getRuleIdentifier(), manualEntity.getReason());
}
correctEntity.addMatchedRules(manualEntity.getMatchedRuleList());
correctEntity.setDictionaryEntry(manualEntity.isDictionaryEntry());
correctEntity.setDossierDictionaryEntry(manualEntity.isDossierDictionaryEntry());
correctEntity.getManualOverwrite().addChanges(manualEntity.getManualOverwrite().getManualChangeLog());

View File

@ -301,9 +301,8 @@ rule "DOC.4.1: study title on cover page between sections"
)
then
List<String> startStrings = List.of("Study Title", "Study Title:", "Title", "Final Report", "Final Study Report", "Report 92 50 12 136");
List<String> stopStrings = List.of("Guideline", "Guidelines", "Study Identification", "Data Requirement", "Submitted", "Test Guideline", "Study Director", "Author", "Including:", "Laboratory Investigations", "Test Article", "HLS", "Official Journal");
List<String> stopStrings = List.of("Guideline", "Guidelines", "Study Identification", "Data Requirement", "Submitted", "Test Guideline",
"Study Director", "Author", "Including:", "Laboratory Investigations", "Test Article", "HLS", "Official Journal");
// too many false positives due to term in header and cover page stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Final Report", $page.getMainBodyTextBlock()));
entityCreationService.shortestBetweenAnyString(startStrings, stopStrings, "title", EntityType.ENTITY, document).forEach(entity -> {

View File

@ -203,6 +203,14 @@ rule "DefaultComponents.8.0: Certificate of analysis batch identification"
componentCreationService.joiningUnique("DefaultComponents.8.0", "Batch_Number", $batchNumbers);
end
rule "StudyConclusion.0.0: Study conclusion in first found section"
when
FileAttribute(label == "oecd_number", value == "425" || value == "430")
$studyConclusions: List() from collect(Entity(type == "study_conclusion"))
then
componentCreationService.joiningUniqueFromFirstSectionOnly("Study_Conclusion.0.0", "Study_Conclusion", $studyConclusions);
end
rule "DefaultComponents.999.0: Create components for all unmapped entities."
salience -999