Pull request #494: RSS-145: Added rules to add new FileAttributes

Merge in RED/redaction-service from RSS-145 to master

* commit 'e2234dc52a8a27a285c0e20b8326bd7129fc7dbf':
  RSS-145: Fixed Immutable list exception when merging existing and added fileattributes
  RSS-145: Added rules to add new FileAttributes
This commit is contained in:
Dominique Eiflaender 2022-10-28 13:07:20 +02:00
commit 18487c639b
8 changed files with 148 additions and 18 deletions

View File

@ -1,5 +1,7 @@
package com.iqser.red.service.redaction.v1.model;
import java.util.Set;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
import lombok.AllArgsConstructor;
@ -32,5 +34,7 @@ public class AnalyzeResult {
private ManualRedactions manualRedactions;
private Set<FileAttribute> addedFileAttributes;
}

View File

@ -0,0 +1,20 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.util.Set;
import com.iqser.red.service.redaction.v1.model.FileAttribute;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class FindEntitiesResult {
private Set<Entity> entities;
private Set<FileAttribute> addedFileAttributes;
}

View File

@ -1,10 +1,13 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.model.FileAttribute;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
@ -19,4 +22,7 @@ public class PageEntities {
@Builder.Default
private Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
@Builder.Default
private Set<FileAttribute> addedFileAttributes = new HashSet<>();
}

View File

@ -74,6 +74,9 @@ public class Section {
@Builder.Default
private List<FileAttribute> fileAttributes = new ArrayList<>();
@Builder.Default
private Set<FileAttribute> addedFileAttributes = new HashSet<>();
@Builder.Default
private List<SectionArea> sectionAreas = new ArrayList<>();
@ -84,6 +87,7 @@ public class Section {
@SuppressWarnings("unused")
@ThenAction
public void addAiEntities(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.TYPE) String asType) {
redactOrRecommendAiEntities(type, asType, false, 0, null, null);
}
@ -91,9 +95,11 @@ public class Section {
@SuppressWarnings("unused")
@ThenAction
public void recommendAiEntities(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.TYPE) String asType) {
redactOrRecommendAiEntities(type, asType, false, 0, null, null);
}
@SuppressWarnings("unused")
@ThenAction
public void redactAiEntities(@Argument(ArgumentType.TYPE) String type,
@ -224,6 +230,52 @@ public class Section {
}
@SuppressWarnings("unused")
@ThenAction
public void addFileAttribute(@Argument(ArgumentType.FILE_ATTRIBUTE) String label, @Argument(ArgumentType.STRING) String value) {
if (fileAttributes == null) {
fileAttributes = new ArrayList<>();
}
boolean exists = fileAttributes.stream().anyMatch(f -> f.getLabel().equals(label) && f.getValue().equals(value));
if (!exists) {
fileAttributes.add(FileAttribute.builder().label(label).value(value).build());
addedFileAttributes.add(FileAttribute.builder().label(label).value(value).build());
}
}
@SuppressWarnings("unused")
@ThenAction
public void addFileAttribute(@Argument(ArgumentType.FILE_ATTRIBUTE) String label,
@Argument(ArgumentType.REGEX) String pattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
@Argument(ArgumentType.INTEGER) int group) {
if (fileAttributes == null) {
fileAttributes = new ArrayList<>();
}
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
Matcher matcher = compiledPattern.matcher(searchText);
while (matcher.find()) {
String match = matcher.group(group);
if (StringUtils.isNotBlank(match)) {
boolean exists = fileAttributes.stream().anyMatch(f -> f.getLabel().equals(label) && f.getValue().equals(match));
if (!exists) {
fileAttributes.add(FileAttribute.builder().label(label).value(match).build());
addedFileAttributes.add(FileAttribute.builder().label(label).value(match).build());
}
}
}
}
@SuppressWarnings("unused")
@WhenCondition
public boolean hasTableHeader(@Argument(ArgumentType.STRING) String headerName) {
@ -1390,9 +1442,9 @@ public class Section {
Set<Entity> entitiesOfType = nerEntities.stream().filter(nerEntity -> nerEntity.getType().equals(type)).collect(Collectors.toSet());
List<String> values = entitiesOfType.stream().map(Entity::getWord).collect(Collectors.toList());
Set<Entity> found = EntitySearchUtils.findEntities(searchText,
new SearchImplementation(values, dictionary.isCaseInsensitiveDictionary(asType)),
dictionary.getType(asType),
new FindEntityDetails(asType, headline, sectionNumber, false, false, Engine.NER, EntityType.RECOMMENDATION));
new SearchImplementation(values, dictionary.isCaseInsensitiveDictionary(asType)),
dictionary.getType(asType),
new FindEntityDetails(asType, headline, sectionNumber, false, false, Engine.NER, EntityType.RECOMMENDATION));
EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary, manualRedactions);
found = found.stream().filter(f -> !f.isFalsePositive()).collect(Collectors.toSet());

View File

@ -26,6 +26,7 @@ import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.do
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.legalbasis.LegalBasis;
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
import com.iqser.red.service.redaction.v1.model.FileAttribute;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.model.RedactionLog;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
@ -159,7 +160,7 @@ public class AnalyzeService {
analyzeRequest);
if (sectionsToReanalyse.isEmpty()) {
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true);
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true, new HashSet<>());
}
NerEntities nerEntities;
@ -191,7 +192,7 @@ public class AnalyzeService {
redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.getType().equals(IMPORTED_REDACTION_TYPE));
redactionLog.getRedactionLogEntry().addAll(importedRedactionFilteredEntries);
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true);
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true, pageEntities.getAddedFileAttributes());
}
@ -233,7 +234,7 @@ public class AnalyzeService {
true);
redactionLog.setRedactionLogEntry(importedRedactionFilteredEntries);
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionary.getVersion(), false);
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionary.getVersion(), false, pageEntities.getAddedFileAttributes());
}
@ -271,12 +272,14 @@ public class AnalyzeService {
}
private AnalyzeResult finalizeAnalysis(@RequestBody AnalyzeRequest analyzeRequest,
private AnalyzeResult finalizeAnalysis(AnalyzeRequest analyzeRequest,
long startTime,
RedactionLog redactionLog,
Text text,
DictionaryVersion dictionaryVersion,
boolean isReanalysis) {
boolean isReanalysis,
Set<FileAttribute> addedFileAttributes
) {
redactionLog.setDictionaryVersion(dictionaryVersion.getDossierTemplateVersion());
redactionLog.setDossierDictionaryVersion(dictionaryVersion.getDossierVersion());
@ -305,6 +308,7 @@ public class AnalyzeService {
.dossierDictionaryVersion(redactionLog.getDossierDictionaryVersion())
.wasReanalyzed(isReanalysis)
.manualRedactions(analyzeRequest.getManualRedactions())
.addedFileAttributes(addedFileAttributes)
.build();
}

View File

@ -6,6 +6,7 @@ import com.iqser.red.service.persistence.service.v1.api.model.annotations.entity
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.model.FileAttribute;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
@ -40,23 +41,32 @@ public class EntityRedactionService {
public PageEntities findEntities(Dictionary dictionary, List<SectionText> sectionTexts, KieContainer kieContainer, AnalyzeRequest analyzeRequest, NerEntities nerEntities) {
Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
Set<Entity> entities = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, false, null, imagesPerPage, nerEntities);
FindEntitiesResult findEntitiesResult = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, false, null, imagesPerPage, nerEntities);
if (dictionary.hasLocalEntries()) {
Map<Integer, Set<Entity>> hintsPerSectionNumber = getHintsPerSection(entities, dictionary);
Set<Entity> foundByLocal = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, true, hintsPerSectionNumber, imagesPerPage, nerEntities);
EntitySearchUtils.addEntitiesWithHigherRank(entities, foundByLocal, dictionary);
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
if (dictionary.hasLocalEntries() || !findEntitiesResult.getAddedFileAttributes().isEmpty()) {
if(!findEntitiesResult.getAddedFileAttributes().isEmpty()) {
//AnalyzeRequest provides immutable list.
List<FileAttribute> mergedFileAttributes = new ArrayList<>();
mergedFileAttributes.addAll(analyzeRequest.getFileAttributes());
mergedFileAttributes.addAll(findEntitiesResult.getAddedFileAttributes());
analyzeRequest.setFileAttributes(mergedFileAttributes);
}
Map<Integer, Set<Entity>> hintsPerSectionNumber = getHintsPerSection(findEntitiesResult.getEntities(), dictionary);
FindEntitiesResult foundByLocalEntitiesResult = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, true, hintsPerSectionNumber, imagesPerPage, nerEntities);
EntitySearchUtils.addEntitiesWithHigherRank(findEntitiesResult.getEntities(), foundByLocalEntitiesResult.getEntities(), dictionary);
EntitySearchUtils.removeEntitiesContainedInLarger(findEntitiesResult.getEntities());
}
Map<Integer, List<Entity>> entitiesPerPage = convertToEntitiesPerPage(entities);
Map<Integer, List<Entity>> entitiesPerPage = convertToEntitiesPerPage(findEntitiesResult.getEntities());
EntitySearchUtils.removeEntitiesContainedInRedactedLogos(imagesPerPage, entitiesPerPage);
return new PageEntities(entitiesPerPage, imagesPerPage);
return new PageEntities(entitiesPerPage, imagesPerPage, findEntitiesResult.getAddedFileAttributes());
}
public Set<Entity> findEntities(List<SectionText> reanalysisSections,
public FindEntitiesResult findEntities(List<SectionText> reanalysisSections,
Dictionary dictionary,
KieContainer kieContainer,
AnalyzeRequest analyzeRequest,
@ -147,9 +157,23 @@ public class EntityRedactionService {
}
Set<FileAttribute> addedFileAttributes = new HashSet<>();
Set<Entity> entities = new HashSet<>();
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
if(!addedFileAttributes.isEmpty()) {
//Section.Builder provides immutable list.
List<FileAttribute> mergedFileAttributes = new ArrayList<>();
mergedFileAttributes.addAll(sectionSearchableTextPair.getSection().getAddedFileAttributes());
mergedFileAttributes.addAll(addedFileAttributes);
sectionSearchableTextPair.getSection().setFileAttributes(mergedFileAttributes);
}
Section analysedSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
addedFileAttributes.addAll(analysedSection.getAddedFileAttributes());
EntitySearchUtils.removeEntitiesContainedInLarger(analysedSection.getEntities());
var entriesWithoutSurroundingText = analysedSection.getEntities()
@ -177,7 +201,7 @@ public class EntityRedactionService {
});
return entities;
return FindEntitiesResult.builder().entities(entities).addedFileAttributes(addedFileAttributes).build();
}

View File

@ -384,6 +384,19 @@ public class RedactionIntegrationTest {
}
@Test
public void testAddFileAttribute() {
AnalyzeRequest request = prepareStorage("files/RSS/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
assertThat(result.getAddedFileAttributes().size()).isEqualTo(1);
assertThat(result.getAddedFileAttributes()).contains(FileAttribute.builder().label("OECD Number").value("OECD 425").build());
}
@Test
@Ignore
@SneakyThrows

View File

@ -390,4 +390,11 @@ rule "101: Redact CAS numbers"
Section(hasTableHeader("Sample #"))
then
section.redactCell("Sample #", 8, "PII", true, "Redacted because row is a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)");
end
rule "102: Guidelines FileAttributes"
when
Section((text.contains("DATA REQUIREMENT(S):") || text.contains("TEST GUIDELINE(S):")) && (text.contains("OECD") || text.contains("EPA") || text.contains("OPPTS")))
then
section.addFileAttribute("OECD Number", "OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", false, 0);
end