diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/Dictionary.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/Dictionary.java index f935b9b9..93c7e33c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/Dictionary.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/Dictionary.java @@ -2,23 +2,22 @@ package com.iqser.red.service.redaction.v1.server.model.dictionary; import static java.lang.String.format; +import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; -import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException; import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.utils.Patterns; +import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException; import lombok.Data; import lombok.Getter; @@ -108,10 +107,14 @@ public class Dictionary { throw new IllegalArgumentException(format("%s is not a valid dictionary entry", value)); } Set matchedRulesSet = new HashSet<>(matchedRules); - localAccessMap.get(type).getLocalEntriesWithMatchedRules().merge(value.trim(), matchedRulesSet, (set1, set2) -> Stream.concat(set1.stream(), set2.stream()).collect(Collectors.toSet())); + localAccessMap.get(type) + .getLocalEntriesWithMatchedRules() + .merge(value.trim(), matchedRulesSet, (set1, set2) -> Stream.concat(set1.stream(), set2.stream()).collect(Collectors.toSet())); if (alsoAddLastname) { String lastname = value.split(" ")[0]; - localAccessMap.get(type).getLocalEntriesWithMatchedRules().merge(lastname, matchedRulesSet, (set1, set2) -> Stream.concat(set1.stream(), set2.stream()).collect(Collectors.toSet())); + localAccessMap.get(type) + .getLocalEntriesWithMatchedRules() + .merge(lastname, matchedRulesSet, (set1, set2) -> Stream.concat(set1.stream(), set2.stream()).collect(Collectors.toSet())); } } @@ -130,16 +133,20 @@ public class Dictionary { public void addMultipleAuthorsAsRecommendation(TextEntity textEntity) { - String cleanedWord = textEntity.getValue().replaceAll(",", " ").replaceAll(" ", " ").trim() + " "; - Pattern pattern = Patterns.AUTHOR_TABLE_SPLITTER; - Matcher matcher = pattern.matcher(cleanedWord); + splitIntoAuthorNames(textEntity).forEach(authorName -> addLocalDictionaryEntry(textEntity.getType(), authorName, textEntity.getMatchedRuleList(), true)); - while (matcher.find()) { - String match = matcher.group().trim(); - if (match.length() >= 3) { - addLocalDictionaryEntry(textEntity.getType(), match, textEntity.getMatchedRuleList(), true); - } + } + + + public static List splitIntoAuthorNames(TextEntity textEntity) { + + List splitAuthorNames; + if (textEntity.getValue().contains(",")) { + splitAuthorNames = Arrays.asList(textEntity.getValue().split(",")); + } else { + splitAuthorNames = Arrays.asList(textEntity.getValueWithLineBreaks().split("\n")); } + return splitAuthorNames.stream().map(String::trim).filter(authorName -> Patterns.AUTHOR_NAME_PATTERN.matcher(authorName).matches()).toList(); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/Patterns.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/Patterns.java index 8d37ef67..fb88d024 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/Patterns.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/Patterns.java @@ -11,8 +11,7 @@ public final class Patterns { public static final Map patternCache = new HashMap<>(); - public static final Pattern AUTHOR_TABLE_SPLITTER = Pattern.compile( - "(((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,500}( ?[A-ZÄÖÜ]{1,2}\\.){1,3})|(((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,500}( ?[A-ZÄÖÜ]{1,2} ){1,3})"); + public static final Pattern AUTHOR_NAME_PATTERN = Pattern.compile("^(?!\\w*[.]$)\\p{L}+[.\\p{L}\\s]*$"); public Pattern getCompiledPattern(String pattern, boolean caseInsensitive) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java index 01d8ed22..8e0cfddc 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java @@ -1,5 +1,6 @@ package com.iqser.red.service.redaction.v1.server; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.Mockito.when; @@ -103,6 +104,25 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { } + @Test + public void authorNameSplittingTest() { + + AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/tableWithAuthors.pdf"); + + analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); + AnalyzeResult result = analyzeService.analyze(request); + + var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); + + var recommendations = entityLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getEntryType().equals(EntryType.RECOMMENDATION)) + .map(EntityLogEntry::getValue) + .toList(); + + assertThat(recommendations).containsExactlyInAnyOrder("Michael N.", "Funnarie B.", "Feuer A."); + } + @Test public void acceptanceTests() throws IOException { @@ -148,7 +168,8 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { return redactionLog.getEntityLogEntry() .stream() .filter(entry -> entry.getType().equals(type)) - .filter(entry -> entry.getValue().equals(value)).filter(entry -> entry.getContainingNodeId().get(0).equals(sectionNumber.get(0))); + .filter(entry -> entry.getValue().equals(value)) + .filter(entry -> entry.getContainingNodeId().get(0).equals(sectionNumber.get(0))); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index eb4f3a48..861cc64b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -266,9 +266,11 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { @Test public void importedRedactionsTest() throws IOException { - ClassPathResource importedRedactionClasspathResource = new ClassPathResource("files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.IMPORTED_REDACTIONS.json"); - storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), importedRedactionClasspathResource.getInputStream()); - + ClassPathResource importedRedactionClasspathResource = new ClassPathResource( + "files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.IMPORTED_REDACTIONS.json"); + storageService.storeObject(TenantContext.getTenantId(), + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), + importedRedactionClasspathResource.getInputStream()); AnalyzeRequest request = uploadFileToStorage("files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.pdf"); System.out.println("Start Full integration test"); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesIntegrationTest.java index 2a865b1b..d32198d6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesIntegrationTest.java @@ -11,81 +11,21 @@ import java.util.List; import java.util.Set; import java.util.stream.Collectors; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.kie.api.KieServices; -import org.kie.api.builder.KieBuilder; -import org.kie.api.builder.KieFileSystem; -import org.kie.api.builder.KieModule; -import org.kie.api.runtime.KieContainer; -import org.kie.api.runtime.KieSession; -import org.kie.internal.io.ResourceFactory; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Qualifier; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; -import org.springframework.context.annotation.Import; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; -import com.iqser.red.service.redaction.v1.server.document.graph.BuildDocumentIntegrationTest; import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph; -import com.iqser.red.service.redaction.v1.server.service.ManualChangesApplicationService; -import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService; -import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichmentService; - -@Import(ManualChangesIntegrationTest.TestConfiguration.class) -public class ManualChangesIntegrationTest extends BuildDocumentIntegrationTest { - - private static final String RULES = "drools/manual_redaction_rules.drl"; - - @Autowired - private EntityEnrichmentService entityEnrichmentService; - private EntityCreationService entityCreationService; - private KieSession kieSession; - - @Qualifier("kieContainer") - @Autowired - private KieContainer kieContainer; - - @Configuration - @Import(BuildDocumentIntegrationTest.TestConfiguration.class) - public static class TestConfiguration { - - @Bean - public KieContainer kieContainer() { - - KieServices kieServices = KieServices.Factory.get(); - - KieFileSystem kieFileSystem = kieServices.newKieFileSystem(); - kieFileSystem.write(ResourceFactory.newClassPathResource(RULES)); - KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem); - kieBuilder.buildAll(); - KieModule kieModule = kieBuilder.getKieModule(); - - return kieServices.newKieContainer(kieModule.getReleaseId()); - } - - } - - - @BeforeEach - public void createServices() { - - kieSession = kieContainer.newKieSession(); - entityCreationService = new EntityCreationService(entityEnrichmentService, kieSession); - ManualChangesApplicationService manualChangesApplicationService = new ManualChangesApplicationService(entityCreationService); - kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService); - kieSession.setGlobal("entityCreationService", entityCreationService); - } +import com.iqser.red.service.redaction.v1.server.rules.RulesIntegrationTest; +public class ManualChangesIntegrationTest extends RulesIntegrationTest { @Test public void manualResizeRedactionTest() { @@ -108,11 +48,7 @@ public class ManualChangesIntegrationTest extends BuildDocumentIntegrationTest { .updateDictionary(false) .build(); - kieSession.insert(document); - document.streamAllSubNodes().forEach(kieSession::insert); - kieSession.insert(manualResizeRedaction); - kieSession.fireAllRules(); - kieSession.dispose(); + doAnalysis(document, List.of(manualResizeRedaction)); assertEquals(biggerEntity.getTextRange(), entity.getTextRange()); assertEquals(biggerEntity.getDeepestFullyContainingNode(), entity.getDeepestFullyContainingNode()); @@ -141,11 +77,7 @@ public class ManualChangesIntegrationTest extends BuildDocumentIntegrationTest { .requestDate(OffsetDateTime.now()) .build(); - kieSession.insert(manualForceRedaction); - kieSession.insert(document); - document.streamAllSubNodes().forEach(kieSession::insert); - kieSession.fireAllRules(); - kieSession.dispose(); + doAnalysis(document, List.of(manualForceRedaction)); assertEquals(Paragraph.class, entity.getDeepestFullyContainingNode().getClass()); assertFalse(entity.getIntersectingNodes().isEmpty()); @@ -170,11 +102,7 @@ public class ManualChangesIntegrationTest extends BuildDocumentIntegrationTest { String initialId = entity.getPositionsOnPagePerPage().get(0).getId(); IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).status(AnnotationStatus.APPROVED).requestDate(OffsetDateTime.now()).build(); - kieSession.insert(document); - document.streamAllSubNodes().forEach(kieSession::insert); - kieSession.insert(idRemoval); - kieSession.fireAllRules(); - kieSession.dispose(); + doAnalysis(document, List.of(idRemoval)); assertEquals("David Ksenia", entity.getValue()); assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId()); @@ -199,12 +127,7 @@ public class ManualChangesIntegrationTest extends BuildDocumentIntegrationTest { .requestDate(OffsetDateTime.now()) .build(); - kieSession.insert(document); - document.streamAllSubNodes().forEach(kieSession::insert); - kieSession.insert(idRemoval); - kieSession.insert(manualForceRedaction); - kieSession.fireAllRules(); - kieSession.dispose(); + doAnalysis(document, List.of(manualForceRedaction)); assertEquals(Paragraph.class, entity.getDeepestFullyContainingNode().getClass()); assertFalse(entity.getIntersectingNodes().isEmpty()); @@ -227,11 +150,7 @@ public class ManualChangesIntegrationTest extends BuildDocumentIntegrationTest { String initialId = entity.getPositionsOnPagePerPage().get(0).getId(); IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).status(AnnotationStatus.REQUESTED).build(); - kieSession.insert(idRemoval); - kieSession.insert(document); - document.streamAllSubNodes().forEach(kieSession::insert); - kieSession.fireAllRules(); - kieSession.dispose(); + doAnalysis(document, List.of(idRemoval)); assertEquals(Paragraph.class, entity.getDeepestFullyContainingNode().getClass()); assertFalse(entity.getIntersectingNodes().isEmpty()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/rules/Cbi11Test.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/rules/Cbi11Test.java new file mode 100644 index 00000000..05e51b3a --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/rules/Cbi11Test.java @@ -0,0 +1,28 @@ +package com.iqser.red.service.redaction.v1.server.rules; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import org.junit.jupiter.api.Test; + +import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; + +public class Cbi11Test extends RulesIntegrationTest { + + @Test + public void multiAuthorNameSplittingTest() { + + Document document = buildGraph("files/Minimal Examples/multipleAuthorsInTable.pdf"); + + doAnalysis(document, Collections.emptyList()); + + List authorNames = document.getEntities().stream().map(Dictionary::splitIntoAuthorNames).flatMap(Collection::stream).toList(); + + assertThat(authorNames).containsExactlyInAnyOrder("Cargile", "N.L.", "Ross", "J.A.", "Egli", "Ramsteiner"); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/rules/RulesIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/rules/RulesIntegrationTest.java new file mode 100644 index 00000000..8ce45581 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/rules/RulesIntegrationTest.java @@ -0,0 +1,80 @@ +package com.iqser.red.service.redaction.v1.server.rules; + +import java.util.Collection; + +import org.junit.jupiter.api.BeforeEach; +import org.kie.api.KieServices; +import org.kie.api.builder.KieBuilder; +import org.kie.api.builder.KieFileSystem; +import org.kie.api.builder.KieModule; +import org.kie.api.runtime.KieContainer; +import org.kie.api.runtime.KieSession; +import org.kie.internal.io.ResourceFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; + +import com.iqser.red.service.redaction.v1.server.document.graph.BuildDocumentIntegrationTest; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; +import com.iqser.red.service.redaction.v1.server.service.ManualChangesApplicationService; +import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService; +import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichmentService; + +public class RulesIntegrationTest extends BuildDocumentIntegrationTest { + + protected static final String RULES = "drools/rules.drl"; + + @Autowired + protected EntityEnrichmentService entityEnrichmentService; + protected EntityCreationService entityCreationService; + protected KieSession kieSession; + + @Qualifier("kieContainer") + @Autowired + private KieContainer kieContainer; + + + protected void doAnalysis(Document document, Collection objectToInsert) { + + kieSession.insert(document); + document.streamAllSubNodes().forEach(kieSession::insert); + objectToInsert.forEach(kieSession::insert); + kieSession.fireAllRules(); + kieSession.dispose(); + } + + + @Configuration + @Import(BuildDocumentIntegrationTest.TestConfiguration.class) + public static class TestConfiguration { + + @Bean + public KieContainer kieContainer() { + + KieServices kieServices = KieServices.Factory.get(); + + KieFileSystem kieFileSystem = kieServices.newKieFileSystem(); + kieFileSystem.write(ResourceFactory.newClassPathResource(RULES)); + KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem); + kieBuilder.buildAll(); + KieModule kieModule = kieBuilder.getKieModule(); + + return kieServices.newKieContainer(kieModule.getReleaseId()); + } + + } + + + @BeforeEach + public void createServices() { + + kieSession = kieContainer.newKieSession(); + entityCreationService = new EntityCreationService(entityEnrichmentService, kieSession); + ManualChangesApplicationService manualChangesApplicationService = new ManualChangesApplicationService(entityCreationService); + kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService); + kieSession.setGlobal("entityCreationService", entityCreationService); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/multipleAuthorsInTable.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/multipleAuthorsInTable.pdf new file mode 100644 index 00000000..e981e090 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/multipleAuthorsInTable.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/tableWithAuthors.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/tableWithAuthors.pdf new file mode 100644 index 00000000..fc75667e Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/tableWithAuthors.pdf differ