Merge branch 'RED-7902' into 'master'
RED-7902: fix author name splitting Closes RED-7902 See merge request redactmanager/redaction-service!202
This commit is contained in:
commit
79e76d688e
@ -2,23 +2,22 @@ package com.iqser.red.service.redaction.v1.server.model.dictionary;
|
||||
|
||||
import static java.lang.String.format;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.Patterns;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.Getter;
|
||||
@ -108,10 +107,14 @@ public class Dictionary {
|
||||
throw new IllegalArgumentException(format("%s is not a valid dictionary entry", value));
|
||||
}
|
||||
Set<MatchedRule> matchedRulesSet = new HashSet<>(matchedRules);
|
||||
localAccessMap.get(type).getLocalEntriesWithMatchedRules().merge(value.trim(), matchedRulesSet, (set1, set2) -> Stream.concat(set1.stream(), set2.stream()).collect(Collectors.toSet()));
|
||||
localAccessMap.get(type)
|
||||
.getLocalEntriesWithMatchedRules()
|
||||
.merge(value.trim(), matchedRulesSet, (set1, set2) -> Stream.concat(set1.stream(), set2.stream()).collect(Collectors.toSet()));
|
||||
if (alsoAddLastname) {
|
||||
String lastname = value.split(" ")[0];
|
||||
localAccessMap.get(type).getLocalEntriesWithMatchedRules().merge(lastname, matchedRulesSet, (set1, set2) -> Stream.concat(set1.stream(), set2.stream()).collect(Collectors.toSet()));
|
||||
localAccessMap.get(type)
|
||||
.getLocalEntriesWithMatchedRules()
|
||||
.merge(lastname, matchedRulesSet, (set1, set2) -> Stream.concat(set1.stream(), set2.stream()).collect(Collectors.toSet()));
|
||||
}
|
||||
}
|
||||
|
||||
@ -130,16 +133,20 @@ public class Dictionary {
|
||||
|
||||
public void addMultipleAuthorsAsRecommendation(TextEntity textEntity) {
|
||||
|
||||
String cleanedWord = textEntity.getValue().replaceAll(",", " ").replaceAll(" ", " ").trim() + " ";
|
||||
Pattern pattern = Patterns.AUTHOR_TABLE_SPLITTER;
|
||||
Matcher matcher = pattern.matcher(cleanedWord);
|
||||
splitIntoAuthorNames(textEntity).forEach(authorName -> addLocalDictionaryEntry(textEntity.getType(), authorName, textEntity.getMatchedRuleList(), true));
|
||||
|
||||
while (matcher.find()) {
|
||||
String match = matcher.group().trim();
|
||||
if (match.length() >= 3) {
|
||||
addLocalDictionaryEntry(textEntity.getType(), match, textEntity.getMatchedRuleList(), true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static List<String> splitIntoAuthorNames(TextEntity textEntity) {
|
||||
|
||||
List<String> splitAuthorNames;
|
||||
if (textEntity.getValue().contains(",")) {
|
||||
splitAuthorNames = Arrays.asList(textEntity.getValue().split(","));
|
||||
} else {
|
||||
splitAuthorNames = Arrays.asList(textEntity.getValueWithLineBreaks().split("\n"));
|
||||
}
|
||||
return splitAuthorNames.stream().map(String::trim).filter(authorName -> Patterns.AUTHOR_NAME_PATTERN.matcher(authorName).matches()).toList();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -11,8 +11,7 @@ public final class Patterns {
|
||||
|
||||
public static final Map<String, Pattern> patternCache = new HashMap<>();
|
||||
|
||||
public static final Pattern AUTHOR_TABLE_SPLITTER = Pattern.compile(
|
||||
"(((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,500}( ?[A-ZÄÖÜ]{1,2}\\.){1,3})|(((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,500}( ?[A-ZÄÖÜ]{1,2} ){1,3})");
|
||||
public static final Pattern AUTHOR_NAME_PATTERN = Pattern.compile("^(?!\\w*[.]$)\\p{L}+[.\\p{L}\\s]*$");
|
||||
|
||||
|
||||
public Pattern getCompiledPattern(String pattern, boolean caseInsensitive) {
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
package com.iqser.red.service.redaction.v1.server;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@ -103,6 +104,25 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void authorNameSplittingTest() {
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/tableWithAuthors.pdf");
|
||||
|
||||
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
var recommendations = entityLog.getEntityLogEntry()
|
||||
.stream()
|
||||
.filter(entityLogEntry -> entityLogEntry.getEntryType().equals(EntryType.RECOMMENDATION))
|
||||
.map(EntityLogEntry::getValue)
|
||||
.toList();
|
||||
|
||||
assertThat(recommendations).containsExactlyInAnyOrder("Michael N.", "Funnarie B.", "Feuer A.");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void acceptanceTests() throws IOException {
|
||||
|
||||
@ -148,7 +168,8 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
return redactionLog.getEntityLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> entry.getType().equals(type))
|
||||
.filter(entry -> entry.getValue().equals(value)).filter(entry -> entry.getContainingNodeId().get(0).equals(sectionNumber.get(0)));
|
||||
.filter(entry -> entry.getValue().equals(value))
|
||||
.filter(entry -> entry.getContainingNodeId().get(0).equals(sectionNumber.get(0)));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -266,9 +266,11 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
|
||||
@Test
|
||||
public void importedRedactionsTest() throws IOException {
|
||||
|
||||
ClassPathResource importedRedactionClasspathResource = new ClassPathResource("files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.IMPORTED_REDACTIONS.json");
|
||||
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), importedRedactionClasspathResource.getInputStream());
|
||||
|
||||
ClassPathResource importedRedactionClasspathResource = new ClassPathResource(
|
||||
"files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.IMPORTED_REDACTIONS.json");
|
||||
storageService.storeObject(TenantContext.getTenantId(),
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS),
|
||||
importedRedactionClasspathResource.getInputStream());
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage("files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.pdf");
|
||||
System.out.println("Start Full integration test");
|
||||
|
||||
@ -11,81 +11,21 @@ import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.kie.api.KieServices;
|
||||
import org.kie.api.builder.KieBuilder;
|
||||
import org.kie.api.builder.KieFileSystem;
|
||||
import org.kie.api.builder.KieModule;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.kie.api.runtime.KieSession;
|
||||
import org.kie.internal.io.ResourceFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Qualifier;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.Import;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.BuildDocumentIntegrationTest;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.service.ManualChangesApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichmentService;
|
||||
|
||||
@Import(ManualChangesIntegrationTest.TestConfiguration.class)
|
||||
public class ManualChangesIntegrationTest extends BuildDocumentIntegrationTest {
|
||||
|
||||
private static final String RULES = "drools/manual_redaction_rules.drl";
|
||||
|
||||
@Autowired
|
||||
private EntityEnrichmentService entityEnrichmentService;
|
||||
private EntityCreationService entityCreationService;
|
||||
private KieSession kieSession;
|
||||
|
||||
@Qualifier("kieContainer")
|
||||
@Autowired
|
||||
private KieContainer kieContainer;
|
||||
|
||||
@Configuration
|
||||
@Import(BuildDocumentIntegrationTest.TestConfiguration.class)
|
||||
public static class TestConfiguration {
|
||||
|
||||
@Bean
|
||||
public KieContainer kieContainer() {
|
||||
|
||||
KieServices kieServices = KieServices.Factory.get();
|
||||
|
||||
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
|
||||
kieFileSystem.write(ResourceFactory.newClassPathResource(RULES));
|
||||
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
|
||||
kieBuilder.buildAll();
|
||||
KieModule kieModule = kieBuilder.getKieModule();
|
||||
|
||||
return kieServices.newKieContainer(kieModule.getReleaseId());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@BeforeEach
|
||||
public void createServices() {
|
||||
|
||||
kieSession = kieContainer.newKieSession();
|
||||
entityCreationService = new EntityCreationService(entityEnrichmentService, kieSession);
|
||||
ManualChangesApplicationService manualChangesApplicationService = new ManualChangesApplicationService(entityCreationService);
|
||||
kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService);
|
||||
kieSession.setGlobal("entityCreationService", entityCreationService);
|
||||
}
|
||||
import com.iqser.red.service.redaction.v1.server.rules.RulesIntegrationTest;
|
||||
|
||||
public class ManualChangesIntegrationTest extends RulesIntegrationTest {
|
||||
|
||||
@Test
|
||||
public void manualResizeRedactionTest() {
|
||||
@ -108,11 +48,7 @@ public class ManualChangesIntegrationTest extends BuildDocumentIntegrationTest {
|
||||
.updateDictionary(false)
|
||||
.build();
|
||||
|
||||
kieSession.insert(document);
|
||||
document.streamAllSubNodes().forEach(kieSession::insert);
|
||||
kieSession.insert(manualResizeRedaction);
|
||||
kieSession.fireAllRules();
|
||||
kieSession.dispose();
|
||||
doAnalysis(document, List.of(manualResizeRedaction));
|
||||
|
||||
assertEquals(biggerEntity.getTextRange(), entity.getTextRange());
|
||||
assertEquals(biggerEntity.getDeepestFullyContainingNode(), entity.getDeepestFullyContainingNode());
|
||||
@ -141,11 +77,7 @@ public class ManualChangesIntegrationTest extends BuildDocumentIntegrationTest {
|
||||
.requestDate(OffsetDateTime.now())
|
||||
.build();
|
||||
|
||||
kieSession.insert(manualForceRedaction);
|
||||
kieSession.insert(document);
|
||||
document.streamAllSubNodes().forEach(kieSession::insert);
|
||||
kieSession.fireAllRules();
|
||||
kieSession.dispose();
|
||||
doAnalysis(document, List.of(manualForceRedaction));
|
||||
|
||||
assertEquals(Paragraph.class, entity.getDeepestFullyContainingNode().getClass());
|
||||
assertFalse(entity.getIntersectingNodes().isEmpty());
|
||||
@ -170,11 +102,7 @@ public class ManualChangesIntegrationTest extends BuildDocumentIntegrationTest {
|
||||
String initialId = entity.getPositionsOnPagePerPage().get(0).getId();
|
||||
IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).status(AnnotationStatus.APPROVED).requestDate(OffsetDateTime.now()).build();
|
||||
|
||||
kieSession.insert(document);
|
||||
document.streamAllSubNodes().forEach(kieSession::insert);
|
||||
kieSession.insert(idRemoval);
|
||||
kieSession.fireAllRules();
|
||||
kieSession.dispose();
|
||||
doAnalysis(document, List.of(idRemoval));
|
||||
|
||||
assertEquals("David Ksenia", entity.getValue());
|
||||
assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId());
|
||||
@ -199,12 +127,7 @@ public class ManualChangesIntegrationTest extends BuildDocumentIntegrationTest {
|
||||
.requestDate(OffsetDateTime.now())
|
||||
.build();
|
||||
|
||||
kieSession.insert(document);
|
||||
document.streamAllSubNodes().forEach(kieSession::insert);
|
||||
kieSession.insert(idRemoval);
|
||||
kieSession.insert(manualForceRedaction);
|
||||
kieSession.fireAllRules();
|
||||
kieSession.dispose();
|
||||
doAnalysis(document, List.of(manualForceRedaction));
|
||||
|
||||
assertEquals(Paragraph.class, entity.getDeepestFullyContainingNode().getClass());
|
||||
assertFalse(entity.getIntersectingNodes().isEmpty());
|
||||
@ -227,11 +150,7 @@ public class ManualChangesIntegrationTest extends BuildDocumentIntegrationTest {
|
||||
String initialId = entity.getPositionsOnPagePerPage().get(0).getId();
|
||||
IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).status(AnnotationStatus.REQUESTED).build();
|
||||
|
||||
kieSession.insert(idRemoval);
|
||||
kieSession.insert(document);
|
||||
document.streamAllSubNodes().forEach(kieSession::insert);
|
||||
kieSession.fireAllRules();
|
||||
kieSession.dispose();
|
||||
doAnalysis(document, List.of(idRemoval));
|
||||
|
||||
assertEquals(Paragraph.class, entity.getDeepestFullyContainingNode().getClass());
|
||||
assertFalse(entity.getIntersectingNodes().isEmpty());
|
||||
|
||||
@ -0,0 +1,28 @@
|
||||
package com.iqser.red.service.redaction.v1.server.rules;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
|
||||
public class Cbi11Test extends RulesIntegrationTest {
|
||||
|
||||
@Test
|
||||
public void multiAuthorNameSplittingTest() {
|
||||
|
||||
Document document = buildGraph("files/Minimal Examples/multipleAuthorsInTable.pdf");
|
||||
|
||||
doAnalysis(document, Collections.emptyList());
|
||||
|
||||
List<String> authorNames = document.getEntities().stream().map(Dictionary::splitIntoAuthorNames).flatMap(Collection::stream).toList();
|
||||
|
||||
assertThat(authorNames).containsExactlyInAnyOrder("Cargile", "N.L.", "Ross", "J.A.", "Egli", "Ramsteiner");
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,80 @@
|
||||
package com.iqser.red.service.redaction.v1.server.rules;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.kie.api.KieServices;
|
||||
import org.kie.api.builder.KieBuilder;
|
||||
import org.kie.api.builder.KieFileSystem;
|
||||
import org.kie.api.builder.KieModule;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.kie.api.runtime.KieSession;
|
||||
import org.kie.internal.io.ResourceFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Qualifier;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.Import;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.BuildDocumentIntegrationTest;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.service.ManualChangesApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichmentService;
|
||||
|
||||
public class RulesIntegrationTest extends BuildDocumentIntegrationTest {
|
||||
|
||||
protected static final String RULES = "drools/rules.drl";
|
||||
|
||||
@Autowired
|
||||
protected EntityEnrichmentService entityEnrichmentService;
|
||||
protected EntityCreationService entityCreationService;
|
||||
protected KieSession kieSession;
|
||||
|
||||
@Qualifier("kieContainer")
|
||||
@Autowired
|
||||
private KieContainer kieContainer;
|
||||
|
||||
|
||||
protected void doAnalysis(Document document, Collection<Object> objectToInsert) {
|
||||
|
||||
kieSession.insert(document);
|
||||
document.streamAllSubNodes().forEach(kieSession::insert);
|
||||
objectToInsert.forEach(kieSession::insert);
|
||||
kieSession.fireAllRules();
|
||||
kieSession.dispose();
|
||||
}
|
||||
|
||||
|
||||
@Configuration
|
||||
@Import(BuildDocumentIntegrationTest.TestConfiguration.class)
|
||||
public static class TestConfiguration {
|
||||
|
||||
@Bean
|
||||
public KieContainer kieContainer() {
|
||||
|
||||
KieServices kieServices = KieServices.Factory.get();
|
||||
|
||||
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
|
||||
kieFileSystem.write(ResourceFactory.newClassPathResource(RULES));
|
||||
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
|
||||
kieBuilder.buildAll();
|
||||
KieModule kieModule = kieBuilder.getKieModule();
|
||||
|
||||
return kieServices.newKieContainer(kieModule.getReleaseId());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@BeforeEach
|
||||
public void createServices() {
|
||||
|
||||
kieSession = kieContainer.newKieSession();
|
||||
entityCreationService = new EntityCreationService(entityEnrichmentService, kieSession);
|
||||
ManualChangesApplicationService manualChangesApplicationService = new ManualChangesApplicationService(entityCreationService);
|
||||
kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService);
|
||||
kieSession.setGlobal("entityCreationService", entityCreationService);
|
||||
}
|
||||
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user