RED-8670: add idp fields and llm tracking

This commit is contained in:
Kilian Schuettler 2024-12-17 16:53:04 +01:00
parent a03ab9f105
commit ce3da37a8b
13 changed files with 44 additions and 30 deletions

View File

@ -40,7 +40,7 @@ dependencies {
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
}
api("com.knecon.fforesight:azure-ocr-service-api:0.24.0")
implementation("com.knecon.fforesight:llm-service-api:1.17.0")
implementation("com.knecon.fforesight:llm-service-api:1.35.0")
api("com.knecon.fforesight:jobs-commons:0.10.0")
api("com.iqser.red.commons:storage-commons:2.50.0")
api("com.knecon.fforesight:tenant-commons:0.31.0-RED10196.0") {

View File

@ -154,6 +154,8 @@ public class DossierTemplateEntity {
dossierTemplateCopy.removeWatermark = dossierTemplateEntity.removeWatermark;
dossierTemplateCopy.downloadFileTypes = dossierTemplateEntity.downloadFileTypes;
dossierTemplateCopy.layoutParsingType = dossierTemplateEntity.layoutParsingType;
dossierTemplateCopy.rotationCorrectionByDefault = dossierTemplateEntity.rotationCorrectionByDefault;
dossierTemplateCopy.idpByDefault = dossierTemplateEntity.idpByDefault;
return dossierTemplateCopy;
}

View File

@ -158,9 +158,9 @@ public class FileEntity {
@Column(name = "ocr_end_time")
private OffsetDateTime ocrEndTime;
private Integer usedPromptTokens;
private int usedPromptTokens;
private Integer usedCompletionTokens;
private int usedCompletionTokens;
@Column
private boolean hasAnnotationComments;

View File

@ -352,7 +352,6 @@ public class FileStatusService {
return;
}
boolean forceAnalysis = false;
if (settings.isLlmNerServiceEnabled()) {
boolean objectExists = fileManagementStorageService.objectExists(dossierId, fileId, FileType.LLM_NER_ENTITIES);
@ -386,7 +385,7 @@ public class FileStatusService {
boolean reanalyse = fileModel.isReanalysisRequired() || analysisType.equals(AnalysisType.MANUAL_REDACTION_REANALYZE);
MessageType messageType = calculateMessageType(reanalyse, fileModel.getProcessingStatus(), fileModel);
if(analysisType == AnalysisType.FORCE_ANALYSE || forceAnalysis) {
if (analysisType == AnalysisType.FORCE_ANALYSE || forceAnalysis) {
messageType = MessageType.ANALYSE;
}
@ -837,7 +836,7 @@ public class FileStatusService {
fileStatusPersistenceService.updateWorkflowStatus(fileId, newWorkflowStatus, false);
if(oldWorkflowStatus == WorkflowStatus.APPROVED && newWorkflowStatus != WorkflowStatus.APPROVED) {
if (oldWorkflowStatus == WorkflowStatus.APPROVED && newWorkflowStatus != WorkflowStatus.APPROVED) {
fileStatusPersistenceService.clearLastDownload(fileId);
}
}
@ -980,6 +979,13 @@ public class FileStatusService {
@Transactional
public void setStatusFullReprocess(String dossierId, String fileId, boolean priority, boolean requiresStructureAnalysis, boolean runOcr) {
setStatusFullReprocess(dossierId, fileId, priority, requiresStructureAnalysis, runOcr, false);
}
@Transactional
public void setStatusFullReprocess(String dossierId, String fileId, boolean priority, boolean requiresStructureAnalysis, boolean runOcr, boolean idp) {
FileEntity fileStatus = fileStatusPersistenceService.getStatus(fileId);
if (fileStatus.isExcluded()) {
@ -987,14 +993,14 @@ public class FileStatusService {
return;
}
if (requiresStructureAnalysis || runOcr) {
if (requiresStructureAnalysis || runOcr || idp) {
log.info("Delete text and NER entities from file {} in dossier {}", fileId, dossierId);
fileManagementStorageService.deleteDocumentAndNerObjects(dossierId, fileId);
}
if (runOcr) {
if (runOcr || idp) {
fileStatusPersistenceService.resetOcrStartAndEndDate(fileId);
setStatusOcrQueued(dossierId, fileId);
setStatusOcrQueued(dossierId, fileId, idp);
return;
}
@ -1081,6 +1087,7 @@ public class FileStatusService {
addToAnalysisQueue(dossierId, fileId, priority, Sets.newHashSet(), AnalysisType.DEFAULT);
}
@Transactional
public void setStatusForceAnalyse(String dossierId, String fileId, boolean priority) {

View File

@ -291,7 +291,8 @@ public class ReanalysisService {
file.getId(),
false,
reanalysisSettings.isRepeatStructureAnalysis(),
reanalysisSettings.runOcr()));
reanalysisSettings.isRunOcr(),
reanalysisSettings.isRunIdp()));
return rejectedFiles;
}

View File

@ -413,7 +413,7 @@ public interface FileRepository extends JpaRepository<FileEntity, String> {
@Modifying(clearAutomatically = true)
@Query("update FileEntity f set f.ocrStartTime = NULL, f.ocrEndTime = NULL, f.numberOfPagesToOCR = NULL, f.numberOfOCRedPages = NULL where f.id = :fileId")
@Query("update FileEntity f set f.ocrStartTime = NULL, f.ocrEndTime = NULL, f.numberOfPagesToOCR = NULL, f.numberOfOCRedPages = NULL, f.numberOfIdpPages = NULL where f.id = :fileId")
void resetOcrStartAndEndDate(@Param("fileId") String fileId);

View File

@ -260,6 +260,6 @@ databaseChangeLog:
- include:
file: db/changelog/tenant/159-cleanup-truncated-indices.yaml
- include:
file: db/changelog/tenant/151.0.0-add-usage-fields-to-file-for-idp-and-llm.yaml
file: db/changelog/tenant/159.0.0-add-usage-fields-to-file-for-idp-and-llm.yaml
- include:
file: db/changelog/tenant/152.0.0-add-idp-related-fields-to-dossier-template.yaml
file: db/changelog/tenant/160.0.0-add-idp-related-fields-to-dossier-template.yaml

View File

@ -9,18 +9,16 @@ databaseChangeLog:
- column:
name: used_completion_tokens
type: int
defaultValueNumeric: 0
defaultValueNumeric: "0"
constraints:
nullable: false
- column:
name: used_prompt_tokens
type: int
defaultValueNumeric: 0
defaultValueNumeric: "0"
constraints:
nullable: false
- column:
name: number_of_idp_pages
type: int
defaultValueNumeric: 0
constraints:
nullable: false
defaultValueNumeric: "0"

View File

@ -191,7 +191,7 @@ public class ComponentOverrideTest extends AbstractPersistenceServerServiceTest
@Test
@SneakyThrows
public void testDeletedFileOverrides() throws IOException {
public void testDeletedFileOverrides() {
var dossier = dossierTesterAndProvider.provideTestDossier();

View File

@ -149,7 +149,10 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
var dossierTemplate = dossierTemplateTesterAndProvider.provideTestTemplate();
TypeResponse types = dictionaryClient.getAllTypes(dossierTemplate.getId(), null, true);
List<TypeValue> systemManagedTypes = types.getTypes().stream().filter(TypeValue::isSystemManaged).collect(Collectors.toList());
List<TypeValue> systemManagedTypes = types.getTypes()
.stream()
.filter(TypeValue::isSystemManaged)
.collect(Collectors.toList());
assertThat(systemManagedTypes.size()).isEqualTo(8);
var allTemplates = dossierTemplateClient.getAllDossierTemplates();
@ -284,17 +287,17 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
dictionaryClient.getDictionaryForType(type2.getType(), type2.getDossierTemplateId(), dossier.getId());
var allTypes = dictionaryClient.getAllTypes(dossierTemplate.getId(), dossier.getId(), false).getTypes();
assertThat(allTypes
.stream().filter(t -> !t.isSystemManaged()).collect(Collectors.toList())
.size()).isEqualTo(4);
assertThat(allTypes.stream()
.filter(t -> !t.isSystemManaged())
.count()).isEqualTo(4);
var typesWithRankOfType1 = allTypes.stream()
.filter(t -> t.getRank() == type.getRank())
.collect(Collectors.toList());
.toList();
assertThat(typesWithRankOfType1.size()).isEqualTo(2);
var typesWithRankOfType2 = allTypes.stream()
.filter(t -> t.getRank() == type2.getRank())
.collect(Collectors.toList());
.toList();
assertThat(typesWithRankOfType2.size()).isEqualTo(2);
dictionaryClient.addEntry(createdType1.getType(), createdType1.getDossierTemplateId(), List.of("entry1", "entry2"), false, null, DictionaryEntryType.ENTRY);
@ -596,7 +599,7 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
.build());
// add new justifications
legalBasisClient.setLegalBasisMapping(List.of(new LegalBasis("nameAgain", "description", "reason","technicalReason")), dossierTemplate.getId());
legalBasisClient.setLegalBasisMapping(List.of(new LegalBasis("nameAgain", "description", "reason", "technicalReason")), dossierTemplate.getId());
existingLegalBasis = legalBasisClient.getLegalBasisMapping(dossierTemplate.getId());
assertThat(existingLegalBasis.size()).isEqualTo(1);

View File

@ -201,6 +201,7 @@ public class SupportControllerTest extends AbstractPersistenceServerServiceTest
Collections.emptySet(),
true,
false,
false,
new FileStatusFilter(null, null, true, true)));
loadedFile2 = fileClient.getFileStatus(dossier2.getId(), file2.getId());
assertThat(loadedFile2.getProcessingStatus()).isEqualTo(ProcessingStatus.FULL_PROCESSING);
@ -247,6 +248,7 @@ public class SupportControllerTest extends AbstractPersistenceServerServiceTest
Collections.emptySet(),
true,
false,
false,
null));
loadedFile2 = fileClient.getFileStatus(dossier2.getId(), file2.getId());
assertThat(loadedFile2.getProcessingStatus()).isEqualTo(ProcessingStatus.FULL_PROCESSING);

View File

@ -10,21 +10,22 @@ import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
@Getter
@Builder
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public final class ReanalysisSettings {
@Getter
@Schema(description = "Provide a list of dossierIds to filter for. If the list is empty, every dossier is selected for reanalysis.", defaultValue = "[]")
Set<String> dossierIds;
@Getter
@Schema(description = "Provide a list of fileIds to filter for. If the list is empty, every file is selected for reanalysis.", defaultValue = "[]")
Set<String> fileIds;
@Getter
@Schema(description = "If set to true, layout parsing and named entity recognition will be repeated.", defaultValue = "false")
boolean repeatStructureAnalysis;
@Schema(description = "If set to true, OCR will be repeated.", defaultValue = "false")
boolean runOcr;
@Schema(description = "If set to true, OCR with IDP will be repeated.", defaultValue = "false")
boolean runIdp;
@Schema(description = "Use this to create a filter for files to reanalyse. Matches anything if set to null.", defaultValue = "null")
FileStatusFilter fileStatusFilter;