diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeResult.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeResult.java index feb49d18..661351a0 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeResult.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeResult.java @@ -1,5 +1,7 @@ package com.iqser.red.service.redaction.v1.model; +import java.util.Set; + import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions; import lombok.AllArgsConstructor; @@ -32,5 +34,7 @@ public class AnalyzeResult { private ManualRedactions manualRedactions; + private Set addedFileAttributes; + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/FindEntitiesResult.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/FindEntitiesResult.java new file mode 100644 index 00000000..bc0dba41 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/FindEntitiesResult.java @@ -0,0 +1,20 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model; + +import java.util.Set; + +import com.iqser.red.service.redaction.v1.model.FileAttribute; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class FindEntitiesResult { + + private Set entities; + private Set addedFileAttributes; +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/PageEntities.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/PageEntities.java index 6bdc6057..f3394629 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/PageEntities.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/PageEntities.java @@ -1,10 +1,13 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import com.iqser.red.service.redaction.v1.model.FileAttribute; + import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -19,4 +22,7 @@ public class PageEntities { @Builder.Default private Map> imagesPerPage = new HashMap<>(); + @Builder.Default + private Set addedFileAttributes = new HashSet<>(); + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index 0a84f380..62f456c9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -74,6 +74,9 @@ public class Section { @Builder.Default private List fileAttributes = new ArrayList<>(); + @Builder.Default + private Set addedFileAttributes = new HashSet<>(); + @Builder.Default private List sectionAreas = new ArrayList<>(); @@ -84,6 +87,7 @@ public class Section { @SuppressWarnings("unused") @ThenAction public void addAiEntities(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.TYPE) String asType) { + redactOrRecommendAiEntities(type, asType, false, 0, null, null); } @@ -91,9 +95,11 @@ public class Section { @SuppressWarnings("unused") @ThenAction public void recommendAiEntities(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.TYPE) String asType) { + redactOrRecommendAiEntities(type, asType, false, 0, null, null); } + @SuppressWarnings("unused") @ThenAction public void redactAiEntities(@Argument(ArgumentType.TYPE) String type, @@ -224,6 +230,52 @@ public class Section { } + @SuppressWarnings("unused") + @ThenAction + public void addFileAttribute(@Argument(ArgumentType.FILE_ATTRIBUTE) String label, @Argument(ArgumentType.STRING) String value) { + + if (fileAttributes == null) { + fileAttributes = new ArrayList<>(); + } + + boolean exists = fileAttributes.stream().anyMatch(f -> f.getLabel().equals(label) && f.getValue().equals(value)); + + if (!exists) { + fileAttributes.add(FileAttribute.builder().label(label).value(value).build()); + addedFileAttributes.add(FileAttribute.builder().label(label).value(value).build()); + } + } + + + @SuppressWarnings("unused") + @ThenAction + public void addFileAttribute(@Argument(ArgumentType.FILE_ATTRIBUTE) String label, + @Argument(ArgumentType.REGEX) String pattern, + @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, + @Argument(ArgumentType.INTEGER) int group) { + + if (fileAttributes == null) { + fileAttributes = new ArrayList<>(); + } + + Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive); + + Matcher matcher = compiledPattern.matcher(searchText); + + while (matcher.find()) { + String match = matcher.group(group); + if (StringUtils.isNotBlank(match)) { + boolean exists = fileAttributes.stream().anyMatch(f -> f.getLabel().equals(label) && f.getValue().equals(match)); + + if (!exists) { + fileAttributes.add(FileAttribute.builder().label(label).value(match).build()); + addedFileAttributes.add(FileAttribute.builder().label(label).value(match).build()); + } + } + } + } + + @SuppressWarnings("unused") @WhenCondition public boolean hasTableHeader(@Argument(ArgumentType.STRING) String headerName) { @@ -1390,9 +1442,9 @@ public class Section { Set entitiesOfType = nerEntities.stream().filter(nerEntity -> nerEntity.getType().equals(type)).collect(Collectors.toSet()); List values = entitiesOfType.stream().map(Entity::getWord).collect(Collectors.toList()); Set found = EntitySearchUtils.findEntities(searchText, - new SearchImplementation(values, dictionary.isCaseInsensitiveDictionary(asType)), - dictionary.getType(asType), - new FindEntityDetails(asType, headline, sectionNumber, false, false, Engine.NER, EntityType.RECOMMENDATION)); + new SearchImplementation(values, dictionary.isCaseInsensitiveDictionary(asType)), + dictionary.getType(asType), + new FindEntityDetails(asType, headline, sectionNumber, false, false, Engine.NER, EntityType.RECOMMENDATION)); EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary, manualRedactions); found = found.stream().filter(f -> !f.isFalsePositive()).collect(Collectors.toSet()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java index 90ee35d6..f8cfe19a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java @@ -26,6 +26,7 @@ import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.do import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.legalbasis.LegalBasis; import com.iqser.red.service.redaction.v1.model.AnalyzeRequest; import com.iqser.red.service.redaction.v1.model.AnalyzeResult; +import com.iqser.red.service.redaction.v1.model.FileAttribute; import com.iqser.red.service.redaction.v1.model.Rectangle; import com.iqser.red.service.redaction.v1.model.RedactionLog; import com.iqser.red.service.redaction.v1.model.RedactionLogEntry; @@ -159,7 +160,7 @@ public class AnalyzeService { analyzeRequest); if (sectionsToReanalyse.isEmpty()) { - return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true); + return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true, new HashSet<>()); } NerEntities nerEntities; @@ -191,7 +192,7 @@ public class AnalyzeService { redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.getType().equals(IMPORTED_REDACTION_TYPE)); redactionLog.getRedactionLogEntry().addAll(importedRedactionFilteredEntries); - return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true); + return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true, pageEntities.getAddedFileAttributes()); } @@ -233,7 +234,7 @@ public class AnalyzeService { true); redactionLog.setRedactionLogEntry(importedRedactionFilteredEntries); - return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionary.getVersion(), false); + return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionary.getVersion(), false, pageEntities.getAddedFileAttributes()); } @@ -271,12 +272,14 @@ public class AnalyzeService { } - private AnalyzeResult finalizeAnalysis(@RequestBody AnalyzeRequest analyzeRequest, + private AnalyzeResult finalizeAnalysis(AnalyzeRequest analyzeRequest, long startTime, RedactionLog redactionLog, Text text, DictionaryVersion dictionaryVersion, - boolean isReanalysis) { + boolean isReanalysis, + Set addedFileAttributes + ) { redactionLog.setDictionaryVersion(dictionaryVersion.getDossierTemplateVersion()); redactionLog.setDossierDictionaryVersion(dictionaryVersion.getDossierVersion()); @@ -305,6 +308,7 @@ public class AnalyzeService { .dossierDictionaryVersion(redactionLog.getDossierDictionaryVersion()) .wasReanalyzed(isReanalysis) .manualRedactions(analyzeRequest.getManualRedactions()) + .addedFileAttributes(addedFileAttributes) .build(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index 8296dfe1..b68c3916 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -6,6 +6,7 @@ import com.iqser.red.service.persistence.service.v1.api.model.annotations.entity import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization; import com.iqser.red.service.redaction.v1.model.AnalyzeRequest; import com.iqser.red.service.redaction.v1.model.Engine; +import com.iqser.red.service.redaction.v1.model.FileAttribute; import com.iqser.red.service.redaction.v1.server.classification.model.SectionText; import com.iqser.red.service.redaction.v1.server.client.model.NerEntities; import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary; @@ -40,23 +41,32 @@ public class EntityRedactionService { public PageEntities findEntities(Dictionary dictionary, List sectionTexts, KieContainer kieContainer, AnalyzeRequest analyzeRequest, NerEntities nerEntities) { Map> imagesPerPage = new HashMap<>(); - Set entities = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, false, null, imagesPerPage, nerEntities); + FindEntitiesResult findEntitiesResult = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, false, null, imagesPerPage, nerEntities); - if (dictionary.hasLocalEntries()) { - Map> hintsPerSectionNumber = getHintsPerSection(entities, dictionary); - Set foundByLocal = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, true, hintsPerSectionNumber, imagesPerPage, nerEntities); - EntitySearchUtils.addEntitiesWithHigherRank(entities, foundByLocal, dictionary); - EntitySearchUtils.removeEntitiesContainedInLarger(entities); + if (dictionary.hasLocalEntries() || !findEntitiesResult.getAddedFileAttributes().isEmpty()) { + + if(!findEntitiesResult.getAddedFileAttributes().isEmpty()) { + //AnalyzeRequest provides immutable list. + List mergedFileAttributes = new ArrayList<>(); + mergedFileAttributes.addAll(analyzeRequest.getFileAttributes()); + mergedFileAttributes.addAll(findEntitiesResult.getAddedFileAttributes()); + analyzeRequest.setFileAttributes(mergedFileAttributes); + } + + Map> hintsPerSectionNumber = getHintsPerSection(findEntitiesResult.getEntities(), dictionary); + FindEntitiesResult foundByLocalEntitiesResult = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, true, hintsPerSectionNumber, imagesPerPage, nerEntities); + EntitySearchUtils.addEntitiesWithHigherRank(findEntitiesResult.getEntities(), foundByLocalEntitiesResult.getEntities(), dictionary); + EntitySearchUtils.removeEntitiesContainedInLarger(findEntitiesResult.getEntities()); } - Map> entitiesPerPage = convertToEntitiesPerPage(entities); + Map> entitiesPerPage = convertToEntitiesPerPage(findEntitiesResult.getEntities()); EntitySearchUtils.removeEntitiesContainedInRedactedLogos(imagesPerPage, entitiesPerPage); - return new PageEntities(entitiesPerPage, imagesPerPage); + return new PageEntities(entitiesPerPage, imagesPerPage, findEntitiesResult.getAddedFileAttributes()); } - public Set findEntities(List reanalysisSections, + public FindEntitiesResult findEntities(List reanalysisSections, Dictionary dictionary, KieContainer kieContainer, AnalyzeRequest analyzeRequest, @@ -147,9 +157,23 @@ public class EntityRedactionService { } + + Set addedFileAttributes = new HashSet<>(); Set entities = new HashSet<>(); sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> { + + if(!addedFileAttributes.isEmpty()) { + //Section.Builder provides immutable list. + List mergedFileAttributes = new ArrayList<>(); + mergedFileAttributes.addAll(sectionSearchableTextPair.getSection().getAddedFileAttributes()); + mergedFileAttributes.addAll(addedFileAttributes); + sectionSearchableTextPair.getSection().setFileAttributes(mergedFileAttributes); + } + Section analysedSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection()); + + addedFileAttributes.addAll(analysedSection.getAddedFileAttributes()); + EntitySearchUtils.removeEntitiesContainedInLarger(analysedSection.getEntities()); var entriesWithoutSurroundingText = analysedSection.getEntities() @@ -177,7 +201,7 @@ public class EntityRedactionService { }); - return entities; + return FindEntitiesResult.builder().entities(entities).addedFileAttributes(addedFileAttributes).build(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 720b148d..38360d8b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -384,6 +384,19 @@ public class RedactionIntegrationTest { } + @Test + public void testAddFileAttribute() { + + AnalyzeRequest request = prepareStorage("files/RSS/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf"); + + analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); + AnalyzeResult result = analyzeService.analyze(request); + + assertThat(result.getAddedFileAttributes().size()).isEqualTo(1); + assertThat(result.getAddedFileAttributes()).contains(FileAttribute.builder().label("OECD Number").value("OECD 425").build()); + } + + @Test @Ignore @SneakyThrows diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index d9d1622d..2b220fbb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -390,4 +390,11 @@ rule "101: Redact CAS numbers" Section(hasTableHeader("Sample #")) then section.redactCell("Sample #", 8, "PII", true, "Redacted because row is a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "102: Guidelines FileAttributes" + when + Section((text.contains("DATA REQUIREMENT(S):") || text.contains("TEST GUIDELINE(S):")) && (text.contains("OECD") || text.contains("EPA") || text.contains("OPPTS"))) + then + section.addFileAttribute("OECD Number", "OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", false, 0); end \ No newline at end of file