Merge branch 'RED-9466-fp' into 'master'
RED-9466 - Adding annotation removes all AI based recommendations until forced re-analysis Closes RED-9466 See merge request redactmanager/redaction-service!453
This commit is contained in:
commit
c279f54295
@ -383,7 +383,7 @@ public class AnalyzeService {
|
||||
|
||||
return new NerEntitiesModel(nerEntitiesModel.getData().entrySet()
|
||||
.stream() //
|
||||
.filter(entry -> sectionsToReanalyseIds.contains(entry.getKey())) //
|
||||
.filter(entry -> sectionsToReanalyseIds.contains(getSuperSectionID(entry.getKey()))) //
|
||||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
|
||||
}
|
||||
|
||||
@ -399,4 +399,11 @@ public class AnalyzeService {
|
||||
return nerEntities;
|
||||
}
|
||||
|
||||
|
||||
private static Integer getSuperSectionID(String section) {
|
||||
|
||||
return NerEntitiesAdapter.sectionNumberToTreeId(section)
|
||||
.get(0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -177,7 +177,7 @@ public class NerEntitiesAdapter {
|
||||
}
|
||||
|
||||
|
||||
private static List<Integer> sectionNumberToTreeId(String sectionNumber) {
|
||||
public static List<Integer> sectionNumberToTreeId(String sectionNumber) {
|
||||
|
||||
return Arrays.stream(sectionNumber.split("\\."))
|
||||
.map(Integer::parseInt)
|
||||
|
||||
@ -115,13 +115,14 @@ public abstract class AbstractRedactionIntegrationTest {
|
||||
public static final String PII_TYPE_ID = DICTIONARY_PII + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String TEST_METHOD_TYPE_ID = TEST_METHOD_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String PUBLISHED_INFORMATION_TYPE_ID = PUBLISHED_INFORMATION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String DOSSIER_PUBLISHED_INFORMATION_TYPE_ID = PUBLISHED_INFORMATION_TYPE_ID+ ":" + TEST_DOSSIER_ID;
|
||||
public static final String DOSSIER_PUBLISHED_INFORMATION_TYPE_ID = PUBLISHED_INFORMATION_TYPE_ID + ":" + TEST_DOSSIER_ID;
|
||||
public static final String MUST_REDACT_TYPE_ID = MUST_REDACT_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String HINT_ONLY_TYPE_ID = HINT_ONLY_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String REDACTION_TYPE_ID = REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String NO_REDACTION_TYPE_ID = NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String SPONSOR_TYPE_ID = DICTIONARY_SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String AUTHOR_TYPE_ID = DICTIONARY_AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String DOSSIER_AUTHOR_TYPE_ID = AUTHOR_TYPE_ID + ":" + TEST_DOSSIER_ID;
|
||||
public static final String ADDRESS_TYPE_ID = DICTIONARY_ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String VERTEBRATE_TYPE_ID = VERTEBRATE_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
|
||||
@ -254,8 +255,10 @@ public abstract class AbstractRedactionIntegrationTest {
|
||||
true));
|
||||
when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(IMPORTED_REDACTION_INDICATOR,
|
||||
true));
|
||||
when(dictionaryClient.getDictionaryForType(DOSSIER_PUBLISHED_INFORMATION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PUBLISHED_INFORMATION_INDICATOR,
|
||||
when(dictionaryClient.getDictionaryForType(DOSSIER_PUBLISHED_INFORMATION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
|
||||
PUBLISHED_INFORMATION_INDICATOR,
|
||||
true));
|
||||
when(dictionaryClient.getDictionaryForType(DOSSIER_AUTHOR_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(DICTIONARY_AUTHOR, true));
|
||||
|
||||
}
|
||||
|
||||
@ -354,6 +357,7 @@ public abstract class AbstractRedactionIntegrationTest {
|
||||
.collect(Collectors.toSet()));
|
||||
dossierDictionary.put(IMPORTED_REDACTION_INDICATOR, new ArrayList<>());
|
||||
dossierDictionary.put(PUBLISHED_INFORMATION_INDICATOR, new ArrayList<>());
|
||||
dossierDictionary.put(DICTIONARY_AUTHOR, new ArrayList<>());
|
||||
|
||||
falsePositive.computeIfAbsent(DICTIONARY_PII, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt")
|
||||
|
||||
@ -10,6 +10,7 @@ import java.time.OffsetDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
@ -25,24 +26,30 @@ import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.FilterType;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit.jupiter.SpringExtension;
|
||||
|
||||
import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService;
|
||||
@ -50,6 +57,8 @@ import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsi
|
||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@ExtendWith(SpringExtension.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class)
|
||||
@ -103,6 +112,17 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.build(),
|
||||
Type.builder()
|
||||
.id(DOSSIER_AUTHOR_TYPE_ID)
|
||||
.type(DICTIONARY_AUTHOR)
|
||||
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
.hexColor("#ffe184")
|
||||
.isHint(hintTypeMap.get(DICTIONARY_AUTHOR))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DICTIONARY_AUTHOR))
|
||||
.isRecommendation(recommendationTypeMap.get(DICTIONARY_AUTHOR))
|
||||
.rank(rankTypeMap.get(DICTIONARY_AUTHOR))
|
||||
.build(),
|
||||
Type.builder()
|
||||
.id(DOSSIER_PUBLISHED_INFORMATION_TYPE_ID)
|
||||
.type(PUBLISHED_INFORMATION_INDICATOR)
|
||||
@ -158,10 +178,10 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst()
|
||||
.orElseThrow();
|
||||
assertThat(publishedInformationEntry1.getSection().startsWith("Paragraph:"));
|
||||
assertThat(publishedInformationEntry1.getSection()).startsWith("Paragraph:");
|
||||
var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst()
|
||||
.orElseThrow();
|
||||
assertThat(asyaLyon1.getSection().startsWith("Paragraph:"));
|
||||
assertThat(asyaLyon1.getSection()).startsWith("Paragraph:");
|
||||
assertEquals(EntryState.SKIPPED, asyaLyon1.getState());
|
||||
|
||||
var idRemoval = buildIdRemoval(publishedInformationEntry1.getId());
|
||||
@ -212,10 +232,10 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst()
|
||||
.orElseThrow();
|
||||
assertThat(publishedInformationEntry1.getSection().startsWith("Paragraph:"));
|
||||
assertThat(publishedInformationEntry1.getSection()).startsWith("Paragraph:");
|
||||
var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst()
|
||||
.orElseThrow();
|
||||
assertThat(publishedInformationEntry1.getSection().startsWith("Paragraph:"));
|
||||
assertThat(publishedInformationEntry1.getSection()).startsWith("Paragraph:");
|
||||
assertEquals(EntryState.SKIPPED, asyaLyon1.getState());
|
||||
|
||||
var idRemoval = buildIdRemoval(publishedInformationEntry1.getId());
|
||||
@ -365,6 +385,75 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
void testNerEntitiesAfterReanalysis() {
|
||||
|
||||
String EFSA_SANITISATION_RULES = loadFromClassPath("drools/efsa_sanitisation.drl");
|
||||
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(EFSA_SANITISATION_RULES));
|
||||
|
||||
ClassPathResource responseJson = new ClassPathResource("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1 3.NER_ENTITIES.json");
|
||||
storageService.storeObject(TenantContext.getTenantId(),
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES),
|
||||
responseJson.getInputStream());
|
||||
|
||||
String pdfFile = "files/new/SYNGENTA_EFSA_sanitisation_GFL_v1 3.pdf";
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage(pdfFile);
|
||||
|
||||
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
|
||||
request.setAnalysisNumber(1);
|
||||
dossierDictionary.put(DICTIONARY_AUTHOR, new ArrayList<>());
|
||||
mockDictionaryCalls(0L);
|
||||
|
||||
analyzeService.analyze(request);
|
||||
|
||||
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
String nerValue = "Osip S.";
|
||||
var nerEntity = findEntityByTypeAndValue(entityLog, DICTIONARY_AUTHOR, nerValue).findFirst()
|
||||
.orElseThrow();
|
||||
assertThat(nerEntity.getEngines()).contains(Engine.NER);
|
||||
|
||||
String dictionaryAddValue = "cooperation";
|
||||
ManualRedactionEntry manualRedactionEntry = ManualRedactionEntry.builder()
|
||||
.value(dictionaryAddValue)
|
||||
.type(DICTIONARY_AUTHOR)
|
||||
.user("user")
|
||||
.addToDossierDictionary(true)
|
||||
.positions(List.of(Rectangle.builder().topLeftX(180.748f).topLeftY(546.564f).width(56.592f).height(15.408f).page(1).build()))
|
||||
.type("dossier_redaction")
|
||||
.fileId(TEST_FILE_ID)
|
||||
.requestDate(OffsetDateTime.now())
|
||||
.annotationId(UUID.randomUUID().toString())
|
||||
.build();
|
||||
request.setManualRedactions(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry)).build());
|
||||
|
||||
request.setAnalysisNumber(2);
|
||||
dossierDictionary.get(DICTIONARY_AUTHOR).add(dictionaryAddValue);
|
||||
reanlysisVersions.put(dictionaryAddValue, 2L);
|
||||
when(dictionaryClient.getVersionForDossier(TEST_DOSSIER_ID)).thenReturn(2L);
|
||||
mockDictionaryCalls(1L);
|
||||
|
||||
AnalyzeResult reanalyzeResult = analyzeService.reanalyze(request);
|
||||
|
||||
entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
EntityLogEntry entityLogEntryAdded = entityLog.getEntityLogEntry()
|
||||
.stream()
|
||||
.filter(entityLogEntry -> entityLogEntry.getValue().equals(dictionaryAddValue))
|
||||
.findFirst()
|
||||
.get();
|
||||
assertEquals(EntryState.APPLIED, entityLogEntryAdded.getState());
|
||||
|
||||
nerEntity = findEntityByTypeAndValue(entityLog, DICTIONARY_AUTHOR, nerValue).findFirst()
|
||||
.orElseThrow();
|
||||
assertThat(nerEntity.getEngines()).contains(Engine.NER);
|
||||
dossierDictionary.get(DICTIONARY_AUTHOR).remove(dictionaryAddValue);
|
||||
|
||||
}
|
||||
|
||||
|
||||
private static IdRemoval buildIdRemoval(String id) {
|
||||
|
||||
return IdRemoval.builder().annotationId(id).user("user").requestDate(OffsetDateTime.now()).fileId(TEST_FILE_ID).build();
|
||||
|
||||
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user