RED-9466 - Adding annotation removes all AI based recommendations until forced re-analysis

This commit is contained in:
Corina Olariu 2024-06-28 15:49:25 +02:00
parent 1b701a67c0
commit 5f52df1ebe
5 changed files with 110 additions and 9 deletions

View File

@ -383,7 +383,7 @@ public class AnalyzeService {
return new NerEntitiesModel(nerEntitiesModel.getData().entrySet() return new NerEntitiesModel(nerEntitiesModel.getData().entrySet()
.stream() // .stream() //
.filter(entry -> sectionsToReanalyseIds.contains(entry.getKey())) // .filter(entry -> sectionsToReanalyseIds.contains(getSuperSectionID(entry.getKey()))) //
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
} }
@ -399,4 +399,11 @@ public class AnalyzeService {
return nerEntities; return nerEntities;
} }
private static Integer getSuperSectionID(String section) {
return NerEntitiesAdapter.sectionNumberToTreeId(section)
.get(0);
}
} }

View File

@ -177,7 +177,7 @@ public class NerEntitiesAdapter {
} }
private static List<Integer> sectionNumberToTreeId(String sectionNumber) { public static List<Integer> sectionNumberToTreeId(String sectionNumber) {
return Arrays.stream(sectionNumber.split("\\.")) return Arrays.stream(sectionNumber.split("\\."))
.map(Integer::parseInt) .map(Integer::parseInt)

View File

@ -115,13 +115,14 @@ public abstract class AbstractRedactionIntegrationTest {
public static final String PII_TYPE_ID = DICTIONARY_PII + ":" + TEST_DOSSIER_TEMPLATE_ID; public static final String PII_TYPE_ID = DICTIONARY_PII + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String TEST_METHOD_TYPE_ID = TEST_METHOD_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; public static final String TEST_METHOD_TYPE_ID = TEST_METHOD_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String PUBLISHED_INFORMATION_TYPE_ID = PUBLISHED_INFORMATION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; public static final String PUBLISHED_INFORMATION_TYPE_ID = PUBLISHED_INFORMATION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String DOSSIER_PUBLISHED_INFORMATION_TYPE_ID = PUBLISHED_INFORMATION_TYPE_ID+ ":" + TEST_DOSSIER_ID; public static final String DOSSIER_PUBLISHED_INFORMATION_TYPE_ID = PUBLISHED_INFORMATION_TYPE_ID + ":" + TEST_DOSSIER_ID;
public static final String MUST_REDACT_TYPE_ID = MUST_REDACT_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; public static final String MUST_REDACT_TYPE_ID = MUST_REDACT_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String HINT_ONLY_TYPE_ID = HINT_ONLY_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; public static final String HINT_ONLY_TYPE_ID = HINT_ONLY_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String REDACTION_TYPE_ID = REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; public static final String REDACTION_TYPE_ID = REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String NO_REDACTION_TYPE_ID = NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; public static final String NO_REDACTION_TYPE_ID = NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String SPONSOR_TYPE_ID = DICTIONARY_SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID; public static final String SPONSOR_TYPE_ID = DICTIONARY_SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String AUTHOR_TYPE_ID = DICTIONARY_AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID; public static final String AUTHOR_TYPE_ID = DICTIONARY_AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String DOSSIER_AUTHOR_TYPE_ID = AUTHOR_TYPE_ID + ":" + TEST_DOSSIER_ID;
public static final String ADDRESS_TYPE_ID = DICTIONARY_ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID; public static final String ADDRESS_TYPE_ID = DICTIONARY_ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String VERTEBRATE_TYPE_ID = VERTEBRATE_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; public static final String VERTEBRATE_TYPE_ID = VERTEBRATE_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
@ -254,8 +255,10 @@ public abstract class AbstractRedactionIntegrationTest {
true)); true));
when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(IMPORTED_REDACTION_INDICATOR, when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(IMPORTED_REDACTION_INDICATOR,
true)); true));
when(dictionaryClient.getDictionaryForType(DOSSIER_PUBLISHED_INFORMATION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PUBLISHED_INFORMATION_INDICATOR, when(dictionaryClient.getDictionaryForType(DOSSIER_PUBLISHED_INFORMATION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
true)); PUBLISHED_INFORMATION_INDICATOR,
true));
when(dictionaryClient.getDictionaryForType(DOSSIER_AUTHOR_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(DICTIONARY_AUTHOR, true));
} }
@ -354,6 +357,7 @@ public abstract class AbstractRedactionIntegrationTest {
.collect(Collectors.toSet())); .collect(Collectors.toSet()));
dossierDictionary.put(IMPORTED_REDACTION_INDICATOR, new ArrayList<>()); dossierDictionary.put(IMPORTED_REDACTION_INDICATOR, new ArrayList<>());
dossierDictionary.put(PUBLISHED_INFORMATION_INDICATOR, new ArrayList<>()); dossierDictionary.put(PUBLISHED_INFORMATION_INDICATOR, new ArrayList<>());
dossierDictionary.put(DICTIONARY_AUTHOR, new ArrayList<>());
falsePositive.computeIfAbsent(DICTIONARY_PII, v -> new ArrayList<>()) falsePositive.computeIfAbsent(DICTIONARY_PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt") .addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt")

View File

@ -10,6 +10,7 @@ import java.time.OffsetDateTime;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
import java.util.UUID;
import java.util.stream.Stream; import java.util.stream.Stream;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
@ -25,24 +26,30 @@ import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.FilterType; import org.springframework.context.annotation.FilterType;
import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary; import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit.jupiter.SpringExtension; import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.iqser.red.commons.jackson.ObjectMapperFactory; import com.iqser.red.commons.jackson.ObjectMapperFactory;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType; import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive; import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest; import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse; import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils; import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
import com.iqser.red.service.redaction.v1.server.service.DictionaryService; import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.storage.commons.StorageAutoConfiguration; import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService; import com.iqser.red.storage.commons.service.StorageService;
import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService; import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService;
@ -50,6 +57,8 @@ import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsi
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration; import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration;
import com.knecon.fforesight.tenantcommons.TenantContext; import com.knecon.fforesight.tenantcommons.TenantContext;
import lombok.SneakyThrows;
@ExtendWith(SpringExtension.class) @ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT) @SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class) @Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class)
@ -103,6 +112,17 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build(), .build(),
Type.builder()
.id(DOSSIER_AUTHOR_TYPE_ID)
.type(DICTIONARY_AUTHOR)
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.dossierId(TEST_DOSSIER_ID)
.hexColor("#ffe184")
.isHint(hintTypeMap.get(DICTIONARY_AUTHOR))
.isCaseInsensitive(caseInSensitiveMap.get(DICTIONARY_AUTHOR))
.isRecommendation(recommendationTypeMap.get(DICTIONARY_AUTHOR))
.rank(rankTypeMap.get(DICTIONARY_AUTHOR))
.build(),
Type.builder() Type.builder()
.id(DOSSIER_PUBLISHED_INFORMATION_TYPE_ID) .id(DOSSIER_PUBLISHED_INFORMATION_TYPE_ID)
.type(PUBLISHED_INFORMATION_INDICATOR) .type(PUBLISHED_INFORMATION_INDICATOR)
@ -158,10 +178,10 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst() var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst()
.orElseThrow(); .orElseThrow();
assertThat(publishedInformationEntry1.getSection().startsWith("Paragraph:")); assertThat(publishedInformationEntry1.getSection()).startsWith("Paragraph:");
var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst() var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst()
.orElseThrow(); .orElseThrow();
assertThat(asyaLyon1.getSection().startsWith("Paragraph:")); assertThat(asyaLyon1.getSection()).startsWith("Paragraph:");
assertEquals(EntryState.SKIPPED, asyaLyon1.getState()); assertEquals(EntryState.SKIPPED, asyaLyon1.getState());
var idRemoval = buildIdRemoval(publishedInformationEntry1.getId()); var idRemoval = buildIdRemoval(publishedInformationEntry1.getId());
@ -212,10 +232,10 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst() var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst()
.orElseThrow(); .orElseThrow();
assertThat(publishedInformationEntry1.getSection().startsWith("Paragraph:")); assertThat(publishedInformationEntry1.getSection()).startsWith("Paragraph:");
var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst() var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst()
.orElseThrow(); .orElseThrow();
assertThat(publishedInformationEntry1.getSection().startsWith("Paragraph:")); assertThat(publishedInformationEntry1.getSection()).startsWith("Paragraph:");
assertEquals(EntryState.SKIPPED, asyaLyon1.getState()); assertEquals(EntryState.SKIPPED, asyaLyon1.getState());
var idRemoval = buildIdRemoval(publishedInformationEntry1.getId()); var idRemoval = buildIdRemoval(publishedInformationEntry1.getId());
@ -365,6 +385,75 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
} }
@Test
@SneakyThrows
void testNerEntitiesAfterReanalysis() {
String EFSA_SANITISATION_RULES = loadFromClassPath("drools/efsa_sanitisation.drl");
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(EFSA_SANITISATION_RULES));
ClassPathResource responseJson = new ClassPathResource("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1 3.NER_ENTITIES.json");
storageService.storeObject(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES),
responseJson.getInputStream());
String pdfFile = "files/new/SYNGENTA_EFSA_sanitisation_GFL_v1 3.pdf";
AnalyzeRequest request = uploadFileToStorage(pdfFile);
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
request.setAnalysisNumber(1);
dossierDictionary.put(DICTIONARY_AUTHOR, new ArrayList<>());
mockDictionaryCalls(0L);
analyzeService.analyze(request);
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
String nerValue = "Osip S.";
var nerEntity = findEntityByTypeAndValue(entityLog, DICTIONARY_AUTHOR, nerValue).findFirst()
.orElseThrow();
assertThat(nerEntity.getEngines()).contains(Engine.NER);
String dictionaryAddValue = "cooperation";
ManualRedactionEntry manualRedactionEntry = ManualRedactionEntry.builder()
.value(dictionaryAddValue)
.type(DICTIONARY_AUTHOR)
.user("user")
.addToDossierDictionary(true)
.positions(List.of(Rectangle.builder().topLeftX(180.748f).topLeftY(546.564f).width(56.592f).height(15.408f).page(1).build()))
.type("dossier_redaction")
.fileId(TEST_FILE_ID)
.requestDate(OffsetDateTime.now())
.annotationId(UUID.randomUUID().toString())
.build();
request.setManualRedactions(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry)).build());
request.setAnalysisNumber(2);
dossierDictionary.get(DICTIONARY_AUTHOR).add(dictionaryAddValue);
reanlysisVersions.put(dictionaryAddValue, 2L);
when(dictionaryClient.getVersionForDossier(TEST_DOSSIER_ID)).thenReturn(2L);
mockDictionaryCalls(1L);
AnalyzeResult reanalyzeResult = analyzeService.reanalyze(request);
entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
EntityLogEntry entityLogEntryAdded = entityLog.getEntityLogEntry()
.stream()
.filter(entityLogEntry -> entityLogEntry.getValue().equals(dictionaryAddValue))
.findFirst()
.get();
assertEquals(EntryState.APPLIED, entityLogEntryAdded.getState());
nerEntity = findEntityByTypeAndValue(entityLog, DICTIONARY_AUTHOR, nerValue).findFirst()
.orElseThrow();
assertThat(nerEntity.getEngines()).contains(Engine.NER);
dossierDictionary.get(DICTIONARY_AUTHOR).remove(dictionaryAddValue);
}
private static IdRemoval buildIdRemoval(String id) { private static IdRemoval buildIdRemoval(String id) {
return IdRemoval.builder().annotationId(id).user("user").requestDate(OffsetDateTime.now()).fileId(TEST_FILE_ID).build(); return IdRemoval.builder().annotationId(id).user("user").requestDate(OffsetDateTime.now()).fileId(TEST_FILE_ID).build();