From f4ea236fc587df34eda20ead5762737bca447f3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominique=20Eifl=C3=A4nder?= Date: Thu, 11 Mar 2021 13:53:16 +0100 Subject: [PATCH] Fixed renalysis for caseinsensitive dictionary entries --- .../redaction/model/DictionaryIncrement.java | 2 +- .../model/DictionaryIncrementValue.java | 13 +++++++ .../redaction/service/DictionaryService.java | 5 +-- .../redaction/service/ReanalyzeService.java | 5 ++- .../redaction/utils/EntitySearchUtils.java | 34 +++++++++++++++++++ .../v1/server/RedactionIntegrationTest.java | 10 ++++-- 6 files changed, 60 insertions(+), 9 deletions(-) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryIncrementValue.java diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryIncrement.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryIncrement.java index 86362741..2366527e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryIncrement.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryIncrement.java @@ -9,7 +9,7 @@ import lombok.Data; @AllArgsConstructor public class DictionaryIncrement { - private Set values; + private Set values; private long dictionaryVersion; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryIncrementValue.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryIncrementValue.java new file mode 100644 index 00000000..ba762ed5 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryIncrementValue.java @@ -0,0 +1,13 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model; + +import lombok.AllArgsConstructor; +import lombok.Data; + +@Data +@AllArgsConstructor +public class DictionaryIncrementValue { + + private String value; + private boolean caseinsensitive; + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java index e622a013..1d44cdf3 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java @@ -22,6 +22,7 @@ import com.iqser.red.service.configuration.v1.api.model.TypeResult; import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary; import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement; +import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue; import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel; import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryRepresentation; @@ -57,12 +58,12 @@ public class DictionaryService { long version = updateDictionary(ruleSetId); - Set newValues = new HashSet<>(); + Set newValues = new HashSet<>(); List dictionaryModels = dictionariesByRuleSets.get(ruleSetId).getDictionary(); dictionaryModels.forEach(dictionaryModel -> { dictionaryModel.getEntries().forEach(dictionaryEntry -> { if (dictionaryEntry.getVersion() > fromVersion) { - newValues.add(dictionaryEntry.getValue()); + newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive())); } }); }); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java index f868490a..b4ca2172 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java @@ -82,9 +82,8 @@ public class ReanalyzeService { } for (SectionText sectionText : renalyzeRequest.getText().getSectionTexts()) { - Set entities = EntitySearchUtils.find(sectionText.getText(), dictionaryIncrement.getValues(), "find", sectionText - .getHeadline(), sectionText.getSectionNumber(), false); - if (!entities.isEmpty()) { + + if (EntitySearchUtils.sectionContainsAny(sectionText.getText(), dictionaryIncrement.getValues())) { sectionsToReanaylse.add(sectionText.getSectionNumber()); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java index 2825232b..f7fffaa4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java @@ -5,12 +5,14 @@ import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.regex.Pattern; import java.util.stream.Collectors; import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary; +import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue; import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence; import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText; @@ -20,8 +22,40 @@ import lombok.extern.slf4j.Slf4j; @Slf4j @UtilityClass +@SuppressWarnings("PMD") public class EntitySearchUtils { + + public boolean sectionContainsAny(String sectionText, Set values) { + + String inputString = sectionText.toLowerCase(Locale.ROOT); + + for (DictionaryIncrementValue value : values) { + + String cleanValue = value.getValue().toLowerCase(Locale.ROOT).trim(); + + if (cleanValue.length() <= 2) { + continue; + } + + int startIndex; + int stopIndex = 0; + do { + startIndex = inputString.indexOf(cleanValue, stopIndex); + stopIndex = startIndex + cleanValue.length(); + + if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString + .charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) { + if(value.isCaseinsensitive() || !value.isCaseinsensitive() && sectionText.substring(startIndex, stopIndex).equals(value.getValue())){ + return true; + } + } + } while (startIndex > -1); + } + return false; + } + + public Set find(String inputString, Set values, String type, String headline, int sectionNumber, boolean local) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index f971651c..84a0e1b8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -498,7 +498,7 @@ public class RedactionIntegrationTest { System.out.println("redactionTest"); long start = System.currentTimeMillis(); - ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_13_Volume_3CP_A9396G_B-1_2018-09-06.pdf"); AnalyzeRequest request = AnalyzeRequest.builder() .ruleSetId(TEST_RULESET_ID) @@ -543,8 +543,12 @@ public class RedactionIntegrationTest { dictionary.get(AUTHOR).add("physical"); reanlysisVersions.put("physical", 2L); - when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(2L); - when(dictionaryClient.getDictionaryForType(AUTHOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(AUTHOR)); + dictionary.get(VERTEBRATE).add("s-metolachlor"); + reanlysisVersions.put("s-metolachlor", 3L); + + when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(3L); + + when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(VERTEBRATE)); start = System.currentTimeMillis(); ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()