Fixed renalysis for caseinsensitive dictionary entries
This commit is contained in:
parent
511092b9e7
commit
f4ea236fc5
@ -9,7 +9,7 @@ import lombok.Data;
|
||||
@AllArgsConstructor
|
||||
public class DictionaryIncrement {
|
||||
|
||||
private Set<String> values;
|
||||
private Set<DictionaryIncrementValue> values;
|
||||
private long dictionaryVersion;
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,13 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
public class DictionaryIncrementValue {
|
||||
|
||||
private String value;
|
||||
private boolean caseinsensitive;
|
||||
|
||||
}
|
||||
@ -22,6 +22,7 @@ import com.iqser.red.service.configuration.v1.api.model.TypeResult;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryRepresentation;
|
||||
|
||||
@ -57,12 +58,12 @@ public class DictionaryService {
|
||||
|
||||
long version = updateDictionary(ruleSetId);
|
||||
|
||||
Set<String> newValues = new HashSet<>();
|
||||
Set<DictionaryIncrementValue> newValues = new HashSet<>();
|
||||
List<DictionaryModel> dictionaryModels = dictionariesByRuleSets.get(ruleSetId).getDictionary();
|
||||
dictionaryModels.forEach(dictionaryModel -> {
|
||||
dictionaryModel.getEntries().forEach(dictionaryEntry -> {
|
||||
if (dictionaryEntry.getVersion() > fromVersion) {
|
||||
newValues.add(dictionaryEntry.getValue());
|
||||
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
@ -82,9 +82,8 @@ public class ReanalyzeService {
|
||||
}
|
||||
|
||||
for (SectionText sectionText : renalyzeRequest.getText().getSectionTexts()) {
|
||||
Set<Entity> entities = EntitySearchUtils.find(sectionText.getText(), dictionaryIncrement.getValues(), "find", sectionText
|
||||
.getHeadline(), sectionText.getSectionNumber(), false);
|
||||
if (!entities.isEmpty()) {
|
||||
|
||||
if (EntitySearchUtils.sectionContainsAny(sectionText.getText(), dictionaryIncrement.getValues())) {
|
||||
sectionsToReanaylse.add(sectionText.getSectionNumber());
|
||||
}
|
||||
|
||||
|
||||
@ -5,12 +5,14 @@ import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
@ -20,8 +22,40 @@ import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@UtilityClass
|
||||
@SuppressWarnings("PMD")
|
||||
public class EntitySearchUtils {
|
||||
|
||||
|
||||
public boolean sectionContainsAny(String sectionText, Set<DictionaryIncrementValue> values) {
|
||||
|
||||
String inputString = sectionText.toLowerCase(Locale.ROOT);
|
||||
|
||||
for (DictionaryIncrementValue value : values) {
|
||||
|
||||
String cleanValue = value.getValue().toLowerCase(Locale.ROOT).trim();
|
||||
|
||||
if (cleanValue.length() <= 2) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int startIndex;
|
||||
int stopIndex = 0;
|
||||
do {
|
||||
startIndex = inputString.indexOf(cleanValue, stopIndex);
|
||||
stopIndex = startIndex + cleanValue.length();
|
||||
|
||||
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString
|
||||
.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
|
||||
if(value.isCaseinsensitive() || !value.isCaseinsensitive() && sectionText.substring(startIndex, stopIndex).equals(value.getValue())){
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} while (startIndex > -1);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
public Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber,
|
||||
boolean local) {
|
||||
|
||||
|
||||
@ -498,7 +498,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
System.out.println("redactionTest");
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_13_Volume_3CP_A9396G_B-1_2018-09-06.pdf");
|
||||
|
||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
@ -543,8 +543,12 @@ public class RedactionIntegrationTest {
|
||||
dictionary.get(AUTHOR).add("physical");
|
||||
reanlysisVersions.put("physical", 2L);
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(2L);
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(AUTHOR));
|
||||
dictionary.get(VERTEBRATE).add("s-metolachlor");
|
||||
reanlysisVersions.put("s-metolachlor", 3L);
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(3L);
|
||||
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(VERTEBRATE));
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user