Pull request #494: RSS-145: Added rules to add new FileAttributes
Merge in RED/redaction-service from RSS-145 to master * commit 'e2234dc52a8a27a285c0e20b8326bd7129fc7dbf': RSS-145: Fixed Immutable list exception when merging existing and added fileattributes RSS-145: Added rules to add new FileAttributes
This commit is contained in:
commit
18487c639b
@ -1,5 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -32,5 +34,7 @@ public class AnalyzeResult {
|
||||
|
||||
private ManualRedactions manualRedactions;
|
||||
|
||||
private Set<FileAttribute> addedFileAttributes;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,20 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.FileAttribute;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class FindEntitiesResult {
|
||||
|
||||
private Set<Entity> entities;
|
||||
private Set<FileAttribute> addedFileAttributes;
|
||||
}
|
||||
@ -1,10 +1,13 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.FileAttribute;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
@ -19,4 +22,7 @@ public class PageEntities {
|
||||
@Builder.Default
|
||||
private Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
|
||||
|
||||
@Builder.Default
|
||||
private Set<FileAttribute> addedFileAttributes = new HashSet<>();
|
||||
|
||||
}
|
||||
|
||||
@ -74,6 +74,9 @@ public class Section {
|
||||
@Builder.Default
|
||||
private List<FileAttribute> fileAttributes = new ArrayList<>();
|
||||
|
||||
@Builder.Default
|
||||
private Set<FileAttribute> addedFileAttributes = new HashSet<>();
|
||||
|
||||
@Builder.Default
|
||||
private List<SectionArea> sectionAreas = new ArrayList<>();
|
||||
|
||||
@ -84,6 +87,7 @@ public class Section {
|
||||
@SuppressWarnings("unused")
|
||||
@ThenAction
|
||||
public void addAiEntities(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.TYPE) String asType) {
|
||||
|
||||
redactOrRecommendAiEntities(type, asType, false, 0, null, null);
|
||||
}
|
||||
|
||||
@ -91,9 +95,11 @@ public class Section {
|
||||
@SuppressWarnings("unused")
|
||||
@ThenAction
|
||||
public void recommendAiEntities(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.TYPE) String asType) {
|
||||
|
||||
redactOrRecommendAiEntities(type, asType, false, 0, null, null);
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@ThenAction
|
||||
public void redactAiEntities(@Argument(ArgumentType.TYPE) String type,
|
||||
@ -224,6 +230,52 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@ThenAction
|
||||
public void addFileAttribute(@Argument(ArgumentType.FILE_ATTRIBUTE) String label, @Argument(ArgumentType.STRING) String value) {
|
||||
|
||||
if (fileAttributes == null) {
|
||||
fileAttributes = new ArrayList<>();
|
||||
}
|
||||
|
||||
boolean exists = fileAttributes.stream().anyMatch(f -> f.getLabel().equals(label) && f.getValue().equals(value));
|
||||
|
||||
if (!exists) {
|
||||
fileAttributes.add(FileAttribute.builder().label(label).value(value).build());
|
||||
addedFileAttributes.add(FileAttribute.builder().label(label).value(value).build());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@ThenAction
|
||||
public void addFileAttribute(@Argument(ArgumentType.FILE_ATTRIBUTE) String label,
|
||||
@Argument(ArgumentType.REGEX) String pattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group) {
|
||||
|
||||
if (fileAttributes == null) {
|
||||
fileAttributes = new ArrayList<>();
|
||||
}
|
||||
|
||||
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
|
||||
|
||||
Matcher matcher = compiledPattern.matcher(searchText);
|
||||
|
||||
while (matcher.find()) {
|
||||
String match = matcher.group(group);
|
||||
if (StringUtils.isNotBlank(match)) {
|
||||
boolean exists = fileAttributes.stream().anyMatch(f -> f.getLabel().equals(label) && f.getValue().equals(match));
|
||||
|
||||
if (!exists) {
|
||||
fileAttributes.add(FileAttribute.builder().label(label).value(match).build());
|
||||
addedFileAttributes.add(FileAttribute.builder().label(label).value(match).build());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean hasTableHeader(@Argument(ArgumentType.STRING) String headerName) {
|
||||
@ -1390,9 +1442,9 @@ public class Section {
|
||||
Set<Entity> entitiesOfType = nerEntities.stream().filter(nerEntity -> nerEntity.getType().equals(type)).collect(Collectors.toSet());
|
||||
List<String> values = entitiesOfType.stream().map(Entity::getWord).collect(Collectors.toList());
|
||||
Set<Entity> found = EntitySearchUtils.findEntities(searchText,
|
||||
new SearchImplementation(values, dictionary.isCaseInsensitiveDictionary(asType)),
|
||||
dictionary.getType(asType),
|
||||
new FindEntityDetails(asType, headline, sectionNumber, false, false, Engine.NER, EntityType.RECOMMENDATION));
|
||||
new SearchImplementation(values, dictionary.isCaseInsensitiveDictionary(asType)),
|
||||
dictionary.getType(asType),
|
||||
new FindEntityDetails(asType, headline, sectionNumber, false, false, Engine.NER, EntityType.RECOMMENDATION));
|
||||
EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary, manualRedactions);
|
||||
found = found.stream().filter(f -> !f.isFalsePositive()).collect(Collectors.toSet());
|
||||
|
||||
|
||||
@ -26,6 +26,7 @@ import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.do
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.legalbasis.LegalBasis;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.FileAttribute;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLog;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
@ -159,7 +160,7 @@ public class AnalyzeService {
|
||||
analyzeRequest);
|
||||
|
||||
if (sectionsToReanalyse.isEmpty()) {
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true);
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true, new HashSet<>());
|
||||
}
|
||||
|
||||
NerEntities nerEntities;
|
||||
@ -191,7 +192,7 @@ public class AnalyzeService {
|
||||
redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.getType().equals(IMPORTED_REDACTION_TYPE));
|
||||
redactionLog.getRedactionLogEntry().addAll(importedRedactionFilteredEntries);
|
||||
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true);
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true, pageEntities.getAddedFileAttributes());
|
||||
}
|
||||
|
||||
|
||||
@ -233,7 +234,7 @@ public class AnalyzeService {
|
||||
true);
|
||||
redactionLog.setRedactionLogEntry(importedRedactionFilteredEntries);
|
||||
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionary.getVersion(), false);
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionary.getVersion(), false, pageEntities.getAddedFileAttributes());
|
||||
}
|
||||
|
||||
|
||||
@ -271,12 +272,14 @@ public class AnalyzeService {
|
||||
}
|
||||
|
||||
|
||||
private AnalyzeResult finalizeAnalysis(@RequestBody AnalyzeRequest analyzeRequest,
|
||||
private AnalyzeResult finalizeAnalysis(AnalyzeRequest analyzeRequest,
|
||||
long startTime,
|
||||
RedactionLog redactionLog,
|
||||
Text text,
|
||||
DictionaryVersion dictionaryVersion,
|
||||
boolean isReanalysis) {
|
||||
boolean isReanalysis,
|
||||
Set<FileAttribute> addedFileAttributes
|
||||
) {
|
||||
|
||||
redactionLog.setDictionaryVersion(dictionaryVersion.getDossierTemplateVersion());
|
||||
redactionLog.setDossierDictionaryVersion(dictionaryVersion.getDossierVersion());
|
||||
@ -305,6 +308,7 @@ public class AnalyzeService {
|
||||
.dossierDictionaryVersion(redactionLog.getDossierDictionaryVersion())
|
||||
.wasReanalyzed(isReanalysis)
|
||||
.manualRedactions(analyzeRequest.getManualRedactions())
|
||||
.addedFileAttributes(addedFileAttributes)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@ -6,6 +6,7 @@ import com.iqser.red.service.persistence.service.v1.api.model.annotations.entity
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.model.FileAttribute;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
@ -40,23 +41,32 @@ public class EntityRedactionService {
|
||||
public PageEntities findEntities(Dictionary dictionary, List<SectionText> sectionTexts, KieContainer kieContainer, AnalyzeRequest analyzeRequest, NerEntities nerEntities) {
|
||||
|
||||
Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
|
||||
Set<Entity> entities = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, false, null, imagesPerPage, nerEntities);
|
||||
FindEntitiesResult findEntitiesResult = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, false, null, imagesPerPage, nerEntities);
|
||||
|
||||
if (dictionary.hasLocalEntries()) {
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber = getHintsPerSection(entities, dictionary);
|
||||
Set<Entity> foundByLocal = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, true, hintsPerSectionNumber, imagesPerPage, nerEntities);
|
||||
EntitySearchUtils.addEntitiesWithHigherRank(entities, foundByLocal, dictionary);
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
|
||||
if (dictionary.hasLocalEntries() || !findEntitiesResult.getAddedFileAttributes().isEmpty()) {
|
||||
|
||||
if(!findEntitiesResult.getAddedFileAttributes().isEmpty()) {
|
||||
//AnalyzeRequest provides immutable list.
|
||||
List<FileAttribute> mergedFileAttributes = new ArrayList<>();
|
||||
mergedFileAttributes.addAll(analyzeRequest.getFileAttributes());
|
||||
mergedFileAttributes.addAll(findEntitiesResult.getAddedFileAttributes());
|
||||
analyzeRequest.setFileAttributes(mergedFileAttributes);
|
||||
}
|
||||
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber = getHintsPerSection(findEntitiesResult.getEntities(), dictionary);
|
||||
FindEntitiesResult foundByLocalEntitiesResult = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, true, hintsPerSectionNumber, imagesPerPage, nerEntities);
|
||||
EntitySearchUtils.addEntitiesWithHigherRank(findEntitiesResult.getEntities(), foundByLocalEntitiesResult.getEntities(), dictionary);
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(findEntitiesResult.getEntities());
|
||||
}
|
||||
|
||||
Map<Integer, List<Entity>> entitiesPerPage = convertToEntitiesPerPage(entities);
|
||||
Map<Integer, List<Entity>> entitiesPerPage = convertToEntitiesPerPage(findEntitiesResult.getEntities());
|
||||
EntitySearchUtils.removeEntitiesContainedInRedactedLogos(imagesPerPage, entitiesPerPage);
|
||||
|
||||
return new PageEntities(entitiesPerPage, imagesPerPage);
|
||||
return new PageEntities(entitiesPerPage, imagesPerPage, findEntitiesResult.getAddedFileAttributes());
|
||||
}
|
||||
|
||||
|
||||
public Set<Entity> findEntities(List<SectionText> reanalysisSections,
|
||||
public FindEntitiesResult findEntities(List<SectionText> reanalysisSections,
|
||||
Dictionary dictionary,
|
||||
KieContainer kieContainer,
|
||||
AnalyzeRequest analyzeRequest,
|
||||
@ -147,9 +157,23 @@ public class EntityRedactionService {
|
||||
|
||||
}
|
||||
|
||||
|
||||
Set<FileAttribute> addedFileAttributes = new HashSet<>();
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
|
||||
|
||||
if(!addedFileAttributes.isEmpty()) {
|
||||
//Section.Builder provides immutable list.
|
||||
List<FileAttribute> mergedFileAttributes = new ArrayList<>();
|
||||
mergedFileAttributes.addAll(sectionSearchableTextPair.getSection().getAddedFileAttributes());
|
||||
mergedFileAttributes.addAll(addedFileAttributes);
|
||||
sectionSearchableTextPair.getSection().setFileAttributes(mergedFileAttributes);
|
||||
}
|
||||
|
||||
Section analysedSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
|
||||
|
||||
addedFileAttributes.addAll(analysedSection.getAddedFileAttributes());
|
||||
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(analysedSection.getEntities());
|
||||
|
||||
var entriesWithoutSurroundingText = analysedSection.getEntities()
|
||||
@ -177,7 +201,7 @@ public class EntityRedactionService {
|
||||
|
||||
});
|
||||
|
||||
return entities;
|
||||
return FindEntitiesResult.builder().entities(entities).addedFileAttributes(addedFileAttributes).build();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -384,6 +384,19 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testAddFileAttribute() {
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/RSS/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf");
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
assertThat(result.getAddedFileAttributes().size()).isEqualTo(1);
|
||||
assertThat(result.getAddedFileAttributes()).contains(FileAttribute.builder().label("OECD Number").value("OECD 425").build());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@Ignore
|
||||
@SneakyThrows
|
||||
|
||||
@ -390,4 +390,11 @@ rule "101: Redact CAS numbers"
|
||||
Section(hasTableHeader("Sample #"))
|
||||
then
|
||||
section.redactCell("Sample #", 8, "PII", true, "Redacted because row is a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
end
|
||||
|
||||
rule "102: Guidelines FileAttributes"
|
||||
when
|
||||
Section((text.contains("DATA REQUIREMENT(S):") || text.contains("TEST GUIDELINE(S):")) && (text.contains("OECD") || text.contains("EPA") || text.contains("OPPTS")))
|
||||
then
|
||||
section.addFileAttribute("OECD Number", "OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", false, 0);
|
||||
end
|
||||
Loading…
x
Reference in New Issue
Block a user