RED-9466 - Adding annotation removes all AI based recommendations until forced re-analysis #453

Open
corina.olariu.ext1 wants to merge 2 commits from RED-9466-fp into master
5 changed files with 110 additions and 9 deletions

View File

@ -383,7 +383,7 @@ public class AnalyzeService {
return new NerEntitiesModel(nerEntitiesModel.getData().entrySet()
.stream() //
.filter(entry -> sectionsToReanalyseIds.contains(entry.getKey())) //
.filter(entry -> sectionsToReanalyseIds.contains(getSuperSectionID(entry.getKey()))) //
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
}
@ -399,4 +399,11 @@ public class AnalyzeService {
return nerEntities;
}
private static Integer getSuperSectionID(String section) {
return NerEntitiesAdapter.sectionNumberToTreeId(section)
.get(0);
}
}

View File

@ -177,7 +177,7 @@ public class NerEntitiesAdapter {
}
private static List<Integer> sectionNumberToTreeId(String sectionNumber) {
public static List<Integer> sectionNumberToTreeId(String sectionNumber) {
return Arrays.stream(sectionNumber.split("\\."))
.map(Integer::parseInt)

View File

@ -122,6 +122,7 @@ public abstract class AbstractRedactionIntegrationTest {
public static final String NO_REDACTION_TYPE_ID = NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String SPONSOR_TYPE_ID = DICTIONARY_SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String AUTHOR_TYPE_ID = DICTIONARY_AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String DOSSIER_AUTHOR_TYPE_ID = AUTHOR_TYPE_ID + ":" + TEST_DOSSIER_ID;
public static final String ADDRESS_TYPE_ID = DICTIONARY_ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String VERTEBRATE_TYPE_ID = VERTEBRATE_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
@ -254,8 +255,10 @@ public abstract class AbstractRedactionIntegrationTest {
true));
when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(IMPORTED_REDACTION_INDICATOR,
true));
when(dictionaryClient.getDictionaryForType(DOSSIER_PUBLISHED_INFORMATION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PUBLISHED_INFORMATION_INDICATOR,
when(dictionaryClient.getDictionaryForType(DOSSIER_PUBLISHED_INFORMATION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
PUBLISHED_INFORMATION_INDICATOR,
true));
when(dictionaryClient.getDictionaryForType(DOSSIER_AUTHOR_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(DICTIONARY_AUTHOR, true));
}
@ -354,6 +357,7 @@ public abstract class AbstractRedactionIntegrationTest {
.collect(Collectors.toSet()));
dossierDictionary.put(IMPORTED_REDACTION_INDICATOR, new ArrayList<>());
dossierDictionary.put(PUBLISHED_INFORMATION_INDICATOR, new ArrayList<>());
dossierDictionary.put(DICTIONARY_AUTHOR, new ArrayList<>());
falsePositive.computeIfAbsent(DICTIONARY_PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt")

View File

@ -10,6 +10,7 @@ import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Stream;
import org.junit.jupiter.api.BeforeEach;
@ -25,24 +26,30 @@ import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.FilterType;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.iqser.red.commons.jackson.ObjectMapperFactory;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService;
@ -50,6 +57,8 @@ import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsi
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration;
import com.knecon.fforesight.tenantcommons.TenantContext;
import lombok.SneakyThrows;
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class)
@ -103,6 +112,17 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build(),
Type.builder()
.id(DOSSIER_AUTHOR_TYPE_ID)
.type(DICTIONARY_AUTHOR)
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.dossierId(TEST_DOSSIER_ID)
.hexColor("#ffe184")
.isHint(hintTypeMap.get(DICTIONARY_AUTHOR))
.isCaseInsensitive(caseInSensitiveMap.get(DICTIONARY_AUTHOR))
.isRecommendation(recommendationTypeMap.get(DICTIONARY_AUTHOR))
.rank(rankTypeMap.get(DICTIONARY_AUTHOR))
.build(),
Type.builder()
.id(DOSSIER_PUBLISHED_INFORMATION_TYPE_ID)
.type(PUBLISHED_INFORMATION_INDICATOR)
@ -158,10 +178,10 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst()
.orElseThrow();
assertThat(publishedInformationEntry1.getSection().startsWith("Paragraph:"));
assertThat(publishedInformationEntry1.getSection()).startsWith("Paragraph:");
var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst()
.orElseThrow();
assertThat(asyaLyon1.getSection().startsWith("Paragraph:"));
assertThat(asyaLyon1.getSection()).startsWith("Paragraph:");
assertEquals(EntryState.SKIPPED, asyaLyon1.getState());
var idRemoval = buildIdRemoval(publishedInformationEntry1.getId());
@ -212,10 +232,10 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst()
.orElseThrow();
assertThat(publishedInformationEntry1.getSection().startsWith("Paragraph:"));
assertThat(publishedInformationEntry1.getSection()).startsWith("Paragraph:");
var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst()
.orElseThrow();
assertThat(publishedInformationEntry1.getSection().startsWith("Paragraph:"));
assertThat(publishedInformationEntry1.getSection()).startsWith("Paragraph:");
assertEquals(EntryState.SKIPPED, asyaLyon1.getState());
var idRemoval = buildIdRemoval(publishedInformationEntry1.getId());
@ -365,6 +385,75 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
}
@Test
@SneakyThrows
void testNerEntitiesAfterReanalysis() {
String EFSA_SANITISATION_RULES = loadFromClassPath("drools/efsa_sanitisation.drl");
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(EFSA_SANITISATION_RULES));
ClassPathResource responseJson = new ClassPathResource("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1 3.NER_ENTITIES.json");
storageService.storeObject(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES),
responseJson.getInputStream());
String pdfFile = "files/new/SYNGENTA_EFSA_sanitisation_GFL_v1 3.pdf";
AnalyzeRequest request = uploadFileToStorage(pdfFile);
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
request.setAnalysisNumber(1);
dossierDictionary.put(DICTIONARY_AUTHOR, new ArrayList<>());
mockDictionaryCalls(0L);
analyzeService.analyze(request);
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
String nerValue = "Osip S.";
var nerEntity = findEntityByTypeAndValue(entityLog, DICTIONARY_AUTHOR, nerValue).findFirst()
.orElseThrow();
assertThat(nerEntity.getEngines()).contains(Engine.NER);
String dictionaryAddValue = "cooperation";
ManualRedactionEntry manualRedactionEntry = ManualRedactionEntry.builder()
.value(dictionaryAddValue)
.type(DICTIONARY_AUTHOR)
.user("user")
.addToDossierDictionary(true)
.positions(List.of(Rectangle.builder().topLeftX(180.748f).topLeftY(546.564f).width(56.592f).height(15.408f).page(1).build()))
.type("dossier_redaction")
.fileId(TEST_FILE_ID)
.requestDate(OffsetDateTime.now())
.annotationId(UUID.randomUUID().toString())
.build();
request.setManualRedactions(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry)).build());
request.setAnalysisNumber(2);
dossierDictionary.get(DICTIONARY_AUTHOR).add(dictionaryAddValue);
reanlysisVersions.put(dictionaryAddValue, 2L);
when(dictionaryClient.getVersionForDossier(TEST_DOSSIER_ID)).thenReturn(2L);
mockDictionaryCalls(1L);
AnalyzeResult reanalyzeResult = analyzeService.reanalyze(request);
entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
EntityLogEntry entityLogEntryAdded = entityLog.getEntityLogEntry()
.stream()
.filter(entityLogEntry -> entityLogEntry.getValue().equals(dictionaryAddValue))
.findFirst()
.get();
assertEquals(EntryState.APPLIED, entityLogEntryAdded.getState());
nerEntity = findEntityByTypeAndValue(entityLog, DICTIONARY_AUTHOR, nerValue).findFirst()
.orElseThrow();
assertThat(nerEntity.getEngines()).contains(Engine.NER);
dossierDictionary.get(DICTIONARY_AUTHOR).remove(dictionaryAddValue);
}
private static IdRemoval buildIdRemoval(String id) {
return IdRemoval.builder().annotationId(id).user("user").requestDate(OffsetDateTime.now()).fileId(TEST_FILE_ID).build();