attempted fix for image clasification

This commit is contained in:
Timo 2021-04-16 20:53:09 +03:00
parent 93d75e2f1c
commit 4749858e80
2 changed files with 13 additions and 5 deletions

View File

@ -17,6 +17,7 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestBody;
@ -56,7 +57,9 @@ public class RedactionController implements RedactionResource {
var redactionLog = redactionStorageService.getRedactionLog(annotateRequest.getProjectId(), annotateRequest.getFileId());
var sectionsGrid = redactionStorageService.getSectionGrid(annotateRequest.getProjectId(), annotateRequest.getFileId());
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
try (PDDocument pdDocument = PDDocument.load(storedObjectStream, MemoryUsageSetting.setupTempFileOnly())) {
pdDocument.setAllSecurityToBeRemoved(true);
pdDocument.setResourceCache(null);
pdDocument.setAllSecurityToBeRemoved(true);
dictionaryService.updateDictionary(redactionLog.getRuleSetId());
@ -77,8 +80,9 @@ public class RedactionController implements RedactionResource {
public RedactionResult classify(@RequestBody RedactionRequest redactionRequest) {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getProjectId(), redactionRequest.getFileId(), FileType.ORIGIN));
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
try (PDDocument pdDocument = PDDocument.load(storedObjectStream, MemoryUsageSetting.setupTempFileOnly())) {
pdDocument.setAllSecurityToBeRemoved(true);
pdDocument.setResourceCache(null);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
pdfVisualisationService.visualizeClassifications(classifiedDoc, pdDocument);
@ -91,7 +95,6 @@ public class RedactionController implements RedactionResource {
}
@Override
public RedactionResult sections(@RequestBody RedactionRequest redactionRequest) {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getProjectId(), redactionRequest.getFileId(), FileType.ORIGIN));
@ -166,4 +169,5 @@ public class RedactionController implements RedactionResource {
}
}

View File

@ -13,6 +13,7 @@ import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationSer
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
@ -44,8 +45,10 @@ public class ReanalyzeService {
var pageCount = 0;
Document classifiedDoc;
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
try (PDDocument pdDocument = PDDocument.load(storedObjectStream, MemoryUsageSetting.setupTempFileOnly())) {
pdDocument.setAllSecurityToBeRemoved(true);
pdDocument.setResourceCache(null);
pageCount = pdDocument.getNumberOfPages();
classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
} catch (Exception e) {
@ -55,7 +58,6 @@ public class ReanalyzeService {
imageClassificationService.classifyImages(classifiedDoc);
entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getRuleSetId(), analyzeRequest.getManualRedactions());
imageClassificationService.classifyImages(classifiedDoc);
redactionLogCreatorService.createRedactionLog(classifiedDoc, pageCount, analyzeRequest.getManualRedactions(), analyzeRequest
.getRuleSetId());
@ -65,6 +67,8 @@ public class ReanalyzeService {
var redactionLog = new RedactionLog(classifiedDoc.getRedactionLogEntities(), classifiedDoc.getDictionaryVersion(), classifiedDoc
.getRulesVersion(), analyzeRequest.getRuleSetId());
log.info("Analyzed with rules {} and dictionary {} for ruleSet: {}", classifiedDoc.getRulesVersion(), classifiedDoc.getDictionaryVersion(), analyzeRequest.getRuleSetId());
// first create changelog - this only happens when we migrate files analyzed via the old process and we don't want to loose changeLog data
var changeLog = redactionChangeLogService.createAndStoreChangeLog(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), redactionLog);
// store redactionLog