diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java index ed55ff17..496f28f3 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java @@ -17,6 +17,7 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.apache.pdfbox.io.MemoryUsageSetting; import org.apache.pdfbox.pdmodel.PDDocument; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RequestBody; @@ -56,7 +57,9 @@ public class RedactionController implements RedactionResource { var redactionLog = redactionStorageService.getRedactionLog(annotateRequest.getProjectId(), annotateRequest.getFileId()); var sectionsGrid = redactionStorageService.getSectionGrid(annotateRequest.getProjectId(), annotateRequest.getFileId()); - try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) { + try (PDDocument pdDocument = PDDocument.load(storedObjectStream, MemoryUsageSetting.setupTempFileOnly())) { + pdDocument.setAllSecurityToBeRemoved(true); + pdDocument.setResourceCache(null); pdDocument.setAllSecurityToBeRemoved(true); dictionaryService.updateDictionary(redactionLog.getRuleSetId()); @@ -77,8 +80,9 @@ public class RedactionController implements RedactionResource { public RedactionResult classify(@RequestBody RedactionRequest redactionRequest) { var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getProjectId(), redactionRequest.getFileId(), FileType.ORIGIN)); - try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) { + try (PDDocument pdDocument = PDDocument.load(storedObjectStream, MemoryUsageSetting.setupTempFileOnly())) { pdDocument.setAllSecurityToBeRemoved(true); + pdDocument.setResourceCache(null); Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument); pdfVisualisationService.visualizeClassifications(classifiedDoc, pdDocument); @@ -91,7 +95,6 @@ public class RedactionController implements RedactionResource { } - @Override public RedactionResult sections(@RequestBody RedactionRequest redactionRequest) { var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getProjectId(), redactionRequest.getFileId(), FileType.ORIGIN)); @@ -166,4 +169,5 @@ public class RedactionController implements RedactionResource { } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java index e25cf0ff..152dd84d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java @@ -13,6 +13,7 @@ import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationSer import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.apache.pdfbox.io.MemoryUsageSetting; import org.apache.pdfbox.pdmodel.PDDocument; import org.kie.api.runtime.KieContainer; import org.springframework.stereotype.Service; @@ -44,8 +45,10 @@ public class ReanalyzeService { var pageCount = 0; Document classifiedDoc; - try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) { + try (PDDocument pdDocument = PDDocument.load(storedObjectStream, MemoryUsageSetting.setupTempFileOnly())) { pdDocument.setAllSecurityToBeRemoved(true); + pdDocument.setResourceCache(null); + pageCount = pdDocument.getNumberOfPages(); classifiedDoc = pdfSegmentationService.parseDocument(pdDocument); } catch (Exception e) { @@ -55,7 +58,6 @@ public class ReanalyzeService { imageClassificationService.classifyImages(classifiedDoc); entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getRuleSetId(), analyzeRequest.getManualRedactions()); - imageClassificationService.classifyImages(classifiedDoc); redactionLogCreatorService.createRedactionLog(classifiedDoc, pageCount, analyzeRequest.getManualRedactions(), analyzeRequest .getRuleSetId()); @@ -65,6 +67,8 @@ public class ReanalyzeService { var redactionLog = new RedactionLog(classifiedDoc.getRedactionLogEntities(), classifiedDoc.getDictionaryVersion(), classifiedDoc .getRulesVersion(), analyzeRequest.getRuleSetId()); + log.info("Analyzed with rules {} and dictionary {} for ruleSet: {}", classifiedDoc.getRulesVersion(), classifiedDoc.getDictionaryVersion(), analyzeRequest.getRuleSetId()); + // first create changelog - this only happens when we migrate files analyzed via the old process and we don't want to loose changeLog data var changeLog = redactionChangeLogService.createAndStoreChangeLog(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), redactionLog); // store redactionLog