diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index d57156bd..752034da 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -1,6 +1,5 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -20,9 +19,8 @@ import com.iqser.red.service.redaction.v1.model.ManualImageRecategorization; import com.iqser.red.service.redaction.v1.model.Status; import com.iqser.red.service.redaction.v1.server.classification.model.SectionText; import com.iqser.red.service.redaction.v1.server.client.EntityRecognitionClient; -import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest; +import com.iqser.red.service.redaction.v1.server.client.model.EntityRecogintionEntity; import com.iqser.red.service.redaction.v1.server.client.model.NerEntities; -import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionSection; import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary; import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel; import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; @@ -77,7 +75,8 @@ public class EntityRedactionService { for (SectionText reanalysisSection : reanalysisSections) { Set entities = findEntities(reanalysisSection.getSearchableText(), reanalysisSection.getHeadline(), reanalysisSection - .getSectionNumber(), dictionary, local, nerEntities); + .getSectionNumber(), dictionary, local, nerEntities, reanalysisSection.getCellStarts()); + if (reanalysisSection.getCellStarts() != null && !reanalysisSection.getCellStarts().isEmpty()) { surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary, reanalysisSection .getCellStarts()); @@ -199,7 +198,8 @@ public class EntityRedactionService { private Set findEntities(SearchableText searchableText, String headline, int sectionNumber, - Dictionary dictionary, boolean local, NerEntities nerEntities) { + Dictionary dictionary, boolean local, NerEntities nerEntities, + List cellstarts) { Set found = new HashSet<>(); String searchableString = searchableText.toString(); @@ -210,30 +210,49 @@ public class EntityRedactionService { String lowercaseInputString = searchableString.toLowerCase(); for (DictionaryModel model : dictionary.getDictionaryModels()) { if (model.isCaseInsensitive()) { - found.addAll(EntitySearchUtils.find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber, local, model + found.addAll(EntitySearchUtils.find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber, !local, model .isDossierDictionary())); } else { - found.addAll(EntitySearchUtils.find(searchableString, model.getValues(local), model.getType(), headline, sectionNumber, local, model + found.addAll(EntitySearchUtils.find(searchableString, model.getValues(local), model.getType(), headline, sectionNumber, !local, model .isDossierDictionary())); } } if (!local) { - addNerEntities(found, sectionNumber, headline, nerEntities); + Map> nerValuesPerType = getNerValues(sectionNumber, nerEntities, cellstarts); + nerValuesPerType.entrySet().forEach(entry -> { + found.addAll(EntitySearchUtils.find(searchableString, entry.getValue(), entry.getKey(), headline, sectionNumber, false, false)); + }); } return EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary); } - private void addNerEntities(Set found, int sectionNumber, String headline, NerEntities nerEntities) { + private Map> getNerValues(int sectionNumber, NerEntities nerEntities, + List cellstarts) { + + Map> nerValuesPerType = new HashMap<>(); if (redactionServiceSettings.isEnableEntityRecognition() && nerEntities.getResult() .containsKey(sectionNumber)) { nerEntities.getResult().get(sectionNumber).forEach(res -> { - found.add(new Entity(new String(Base64.decodeBase64(res.getValue().getBytes())), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false)); + if (cellstarts == null || cellstarts.isEmpty()) { + nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>()).add(new String(Base64.decodeBase64(res.getValue().getBytes()))); + } else { + boolean intersectsCellStart = false; + for (Integer cellStart : cellstarts) { + if (res.getStartOffset() < cellStart && cellStart < res.getEndOffset()) { + intersectsCellStart = true; + } + } + if (!intersectsCellStart) { + nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>()).add(new String(Base64.decodeBase64(res.getValue().getBytes()))); + } + } }); } + return nerValuesPerType; } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java index 0daca578..14bee48b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java @@ -47,7 +47,7 @@ public class EntitySearchUtils { public Set find(String inputString, Set values, String type, String headline, int sectionNumber, - boolean local, boolean isDossierDictionary) { + boolean isDictionaryEntry, boolean isDossierDictionary) { Set found = new HashSet<>(); @@ -67,7 +67,7 @@ public class EntitySearchUtils { if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString .charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) { - found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, !local, isDossierDictionary)); + found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, isDictionaryEntry, isDossierDictionary)); } } while (startIndex > -1); }