RED-3334 Added dropIndex parameter to reindex method

This commit is contained in:
Philipp Schramm 2022-03-14 09:49:04 +01:00
parent f695a2446d
commit 133a523f60
7 changed files with 138 additions and 77 deletions

View File

@ -1,10 +1,14 @@
package com.iqser.red.service.persistence.service.v1.api.resources;
import java.util.Set;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.*;
import java.util.Set;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseStatus;
import com.iqser.red.service.pdftron.redaction.v1.api.model.DocumentRequest;
import com.iqser.red.service.pdftron.redaction.v1.api.model.TextHighlightRequest;
@ -27,8 +31,7 @@ public interface ReanalysisResource {
@PostMapping(value = REANALYZE_PATH + DOSSIER_ID_PATH_PARAM)
void reanalyzeDossier(@PathVariable(DOSSIER_ID_PARAM) String dossierId,
@RequestParam(value = "force", required = false, defaultValue = FALSE) boolean force);
void reanalyzeDossier(@PathVariable(DOSSIER_ID_PARAM) String dossierId, @RequestParam(value = "force", required = false, defaultValue = FALSE) boolean force);
@PostMapping(value = REANALYZE_PATH + DOSSIER_ID_PATH_PARAM + BULK_REST_PATH)
@ -46,16 +49,18 @@ public interface ReanalysisResource {
@PostMapping(value = OCR_REANALYZE_PATH + DOSSIER_ID_PATH_PARAM + BULK_REST_PATH)
void ocrFiles(@PathVariable(DOSSIER_ID_PARAM) String dossierId,
@RequestBody Set<String> fileIds);
void ocrFiles(@PathVariable(DOSSIER_ID_PARAM) String dossierId, @RequestBody Set<String> fileIds);
@PostMapping(value = REINDEX_PATH)
void reindex(@RequestParam(value = DOSSIER_ID_PARAM, required = false) String dossierId, @RequestBody Set<String> fileIds);
void reindex(@RequestParam(value = DOSSIER_ID_PARAM, required = false) String dossierId,
@RequestParam(value = "dropIndex", required = false, defaultValue = FALSE) boolean dropIndex, @RequestBody Set<String> fileIds);
@PostMapping(value = IMPORT_REDACTIONS_PATH, consumes = MediaType.APPLICATION_JSON_VALUE)
void importRedactions(@RequestBody DocumentRequest documentRequest);
@ResponseStatus(value = HttpStatus.OK)
@PostMapping(value = TEXT_HIGHLIGHT_CONVERSION_PATH, consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
TextHighlightResponse processTextHighlights(@RequestBody TextHighlightRequest textHighlightRequest);

View File

@ -1,7 +1,25 @@
package com.iqser.red.service.peristence.v1.server.controller;
import java.time.OffsetDateTime;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import com.google.common.collect.Sets;
import com.iqser.red.service.pdftron.redaction.v1.api.model.*;
import com.iqser.red.service.pdftron.redaction.v1.api.model.DocumentRequest;
import com.iqser.red.service.pdftron.redaction.v1.api.model.PdfTronOptimizeRequest;
import com.iqser.red.service.pdftron.redaction.v1.api.model.PdfTronOptimizeResponse;
import com.iqser.red.service.pdftron.redaction.v1.api.model.TextHighlightOperation;
import com.iqser.red.service.pdftron.redaction.v1.api.model.TextHighlightRequest;
import com.iqser.red.service.pdftron.redaction.v1.api.model.TextHighlightResponse;
import com.iqser.red.service.peristence.v1.server.service.FileStatusService;
import com.iqser.red.service.peristence.v1.server.service.IndexingService;
import com.iqser.red.service.peristence.v1.server.service.ReanalysisRequiredStatusService;
@ -13,22 +31,9 @@ import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.do
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.ProcessingStatus;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.WorkflowStatus;
import com.iqser.red.service.persistence.service.v1.api.resources.ReanalysisResource;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import java.time.OffsetDateTime;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import static com.iqser.red.service.persistence.management.v1.processor.utils.MagicConverter.convert;
@Slf4j
@RestController
@ -43,8 +48,7 @@ public class ReanalysisController implements ReanalysisResource {
@Override
public void reanalyzeDossier(@PathVariable(DOSSIER_ID_PARAM) String dossierId,
@RequestParam(value = "force", required = false, defaultValue = FALSE) boolean force) {
public void reanalyzeDossier(@PathVariable(DOSSIER_ID_PARAM) String dossierId, @RequestParam(value = "force", required = false, defaultValue = FALSE) boolean force) {
var relevantFiles = getAllFilesForDossier(dossierId, true);
reanalyseFiles(dossierId, force, relevantFiles);
@ -64,7 +68,9 @@ public class ReanalysisController implements ReanalysisResource {
var relevantFiles = getAllFilesForDossier(dossierId, true);
relevantFiles.stream().filter(fileStatus -> fileStatus.getLastOCRTime() == null).forEach(fileStatus -> fileStatusService.setStatusOcrProcessing(dossierId, fileStatus.getId()));
relevantFiles.stream()
.filter(fileStatus -> fileStatus.getLastOCRTime() == null)
.forEach(fileStatus -> fileStatusService.setStatusOcrProcessing(dossierId, fileStatus.getId()));
}
@ -72,23 +78,27 @@ public class ReanalysisController implements ReanalysisResource {
var relevantFiles = getRelevantFiles(dossierId, fileIds);
relevantFiles.stream().filter(fileStatus -> fileStatus.getLastOCRTime() == null).forEach(fileStatus -> fileStatusService.setStatusOcrProcessing(dossierId, fileStatus.getId()));
relevantFiles.stream()
.filter(fileStatus -> fileStatus.getLastOCRTime() == null)
.forEach(fileStatus -> fileStatusService.setStatusOcrProcessing(dossierId, fileStatus.getId()));
}
public void reindex(@RequestParam(value = DOSSIER_ID_PARAM, required = false) String dossierId, @RequestBody Set<String> fileIds) {
indexingService.reindex(dossierId, fileIds);
public void reindex(@RequestParam(value = DOSSIER_ID_PARAM, required = false) String dossierId,
@RequestParam(value = "dropIndex", required = false, defaultValue = FALSE) boolean dropIndex, @RequestBody Set<String> fileIds) {
indexingService.reindex(dossierId, fileIds, dropIndex);
}
public void importRedactions(@RequestBody DocumentRequest documentRequest){
public void importRedactions(@RequestBody DocumentRequest documentRequest) {
// validate the pdf file
PdfTronOptimizeRequest request = new PdfTronOptimizeRequest();
request.setDocument(documentRequest.getDocument());
PdfTronOptimizeResponse optimize;
try {
optimize = pDFTronRedactionClient.optimize(request);
}catch (Exception e){
} catch (Exception e) {
throw new BadRequestException("File Not Valid PDF");
}
FileModel file = fileStatusService.getStatus(documentRequest.getFileId());
@ -99,13 +109,15 @@ public class ReanalysisController implements ReanalysisResource {
fileStatusService.setStatusFullReprocess(documentRequest.getDossierId(), documentRequest.getFileId(), 1);
}
public TextHighlightResponse processTextHighlights(@RequestBody TextHighlightRequest textHighlightRequest){
public TextHighlightResponse processTextHighlights(@RequestBody TextHighlightRequest textHighlightRequest) {
var textHighlightResponse = pDFTronRedactionClient.processTextHighlights(textHighlightRequest);
if(textHighlightRequest.getOperation().equals(TextHighlightOperation.REMOVE) || textHighlightRequest.getOperation().equals(TextHighlightOperation.CONVERT)){
if (textHighlightRequest.getOperation().equals(TextHighlightOperation.REMOVE) || textHighlightRequest.getOperation().equals(TextHighlightOperation.CONVERT)) {
fileStatusService.updateFileModificationDate(textHighlightRequest.getFileId(), OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS));
}
if(textHighlightRequest.getOperation().equals(TextHighlightOperation.CONVERT)){
if (textHighlightRequest.getOperation().equals(TextHighlightOperation.CONVERT)) {
fileStatusService.setStatusFullReprocess(textHighlightRequest.getDossierId(), textHighlightRequest.getFileId(), 1);
}
return textHighlightResponse;
@ -189,5 +201,4 @@ public class ReanalysisController implements ReanalysisResource {
}
}

View File

@ -0,0 +1,36 @@
package com.iqser.red.service.peristence.v1.server.migration.index;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.context.event.ApplicationReadyEvent;
import org.springframework.context.ApplicationContext;
import org.springframework.context.event.EventListener;
import org.springframework.stereotype.Service;
import com.iqser.red.service.peristence.v1.server.service.IndexingService;
import com.iqser.red.service.peristence.v1.server.settings.FileManagementServiceSettings;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class MigrateIndexServiceStarter {
private final IndexingService indexingService;
private final FileManagementServiceSettings settings;
private final ApplicationContext ctx;
@EventListener(ApplicationReadyEvent.class)
public void reindexFiles() {
if (settings.isDropIndexAndReindexFiles()) {
log.info("Will call SearchService via queue to close, drop, recreate index and reindex all files");
indexingService.reindex(null, null, true);
System.exit(SpringApplication.exit(ctx, () -> 0));
}
}
}

View File

@ -1,5 +1,14 @@
package com.iqser.red.service.peristence.v1.server.service;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.peristence.v1.server.configuration.MessagingConfiguration;
@ -10,15 +19,8 @@ import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.do
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.ProcessingStatus;
import com.iqser.red.service.search.v1.model.IndexMessage;
import com.iqser.red.service.search.v1.model.IndexMessageType;
import lombok.RequiredArgsConstructor;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import lombok.RequiredArgsConstructor;
@Service
@RequiredArgsConstructor
@ -29,33 +31,38 @@ public class IndexingService {
private final DossierService dossierService;
private final FileStatusPersistenceService fileStatusPersistenceService;
public void reindex(String dossierId, Set<String> fileIds) {
List<Pair<String, String>> reindexDossierIds = new ArrayList<>();
if (dossierId == null) {
List<DossierEntity> dossiers = dossierService.getAllDossiers();
for (DossierEntity dossier : dossiers) {
if (dossier.getStatus().equals(DossierStatus.ACTIVE)) {
public void reindex(String dossierId, Set<String> fileIds, boolean dropIndex) {
if (dropIndex) {
addToIndexingQueue(IndexMessageType.DROP, null, null, null, 2);
} else {
List<Pair<String, String>> reindexDossierIds = new ArrayList<>();
if (dossierId == null) {
List<DossierEntity> dossiers = dossierService.getAllDossiers();
for (DossierEntity dossier : dossiers) {
if (dossier.getStatus().equals(DossierStatus.ACTIVE) || dossier.getStatus().equals(DossierStatus.ARCHIVED)) {
reindexDossierIds.add(new ImmutablePair<>(dossier.getDossierTemplateId(), dossier.getId()));
}
}
} else {
DossierEntity dossier = dossierService.getDossierById(dossierId);
if (dossier.getStatus().equals(DossierStatus.ACTIVE) || dossier.getStatus().equals(DossierStatus.ARCHIVED)) {
reindexDossierIds.add(new ImmutablePair<>(dossier.getDossierTemplateId(), dossier.getId()));
}
}
} else {
DossierEntity dossier = dossierService.getDossierById(dossierId);
if (dossier.getStatus().equals(DossierStatus.ACTIVE)) {
reindexDossierIds.add(new ImmutablePair<>(dossier.getDossierTemplateId(), dossier.getId()));
}
}
for (Pair<String, String> reindexDossierId : reindexDossierIds) {
List<FileEntity> fileStatuses = fileStatusPersistenceService.getStatusesForDossier(reindexDossierId.getRight());
for (FileEntity fileStatus : fileStatuses) {
if (fileStatus.getProcessingStatus().equals(ProcessingStatus.DELETED)) {
continue;
for (Pair<String, String> reindexDossierId : reindexDossierIds) {
List<FileEntity> fileStatuses = fileStatusPersistenceService.getStatusesForDossier(reindexDossierId.getRight());
for (FileEntity fileStatus : fileStatuses) {
if (fileStatus.getProcessingStatus().equals(ProcessingStatus.DELETED)) {
continue;
}
if (fileIds != null && !fileIds.isEmpty() && !fileIds.contains(fileStatus.getId())) {
continue;
}
addToIndexingQueue(IndexMessageType.INSERT, reindexDossierId.getLeft(), reindexDossierId.getRight(), fileStatus.getId(), 2);
}
if (fileIds != null && !fileIds.isEmpty() && !fileIds.contains(fileStatus.getId())) {
continue;
}
addToIndexingQueue(IndexMessageType.INSERT, reindexDossierId.getLeft(), reindexDossierId.getRight(), fileStatus.getId(), 2);
}
}
}

View File

@ -21,7 +21,7 @@ public class FileManagementServiceSettings {
private int softDeleteCleanupTime = 96;
private boolean migrateOnly;
private boolean dropIndexAndReindexFiles;
private boolean imageServiceEnabled = true;
private boolean nerServiceEnabled = true;

View File

@ -1,5 +1,17 @@
package com.iqser.red.service.peristence.v1.server.integration.tests;
import static org.assertj.core.api.Assertions.assertThat;
import java.time.OffsetDateTime;
import java.time.temporal.ChronoUnit;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.junit.Before;
import org.junit.Test;
import org.springframework.beans.factory.annotation.Autowired;
import com.iqser.red.service.peristence.v1.server.integration.client.DossierStatsClient;
import com.iqser.red.service.peristence.v1.server.integration.client.FileClient;
import com.iqser.red.service.peristence.v1.server.integration.service.DossierTemplateTesterAndProvider;
@ -10,21 +22,11 @@ import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.do
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.DossierStats;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.ProcessingStatus;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.WorkflowStatus;
import org.junit.Before;
import org.junit.Test;
import org.springframework.beans.factory.annotation.Autowired;
import java.time.OffsetDateTime;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import static org.assertj.core.api.Assertions.assertThat;
public class DossierStatsTest extends AbstractPersistenceServerServiceTest {
private static final int NUMBER_PAGES_ANALYZED = 5;
private static final OffsetDateTime NOW = OffsetDateTime.now();
private static final OffsetDateTime NOW = OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS);
private static final OffsetDateTime OLDER_1 = NOW.minusSeconds(5);
private static final OffsetDateTime OLDER_2 = NOW.minusSeconds(15);

View File

@ -26,7 +26,7 @@
<properties>
<redaction-service.version>3.76.0</redaction-service.version>
<search-service.version>2.18.0</search-service.version>
<search-service.version>2.26.0</search-service.version>
<pdftron-redaction-service.version>3.44.0</pdftron-redaction-service.version>
<redaction-report-service.version>3.19.0</redaction-report-service.version>
</properties>