diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java index 3879bb80..b19ce7ba 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java @@ -1,5 +1,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; +import static java.util.stream.Collectors.toSet; + import lombok.Data; import lombok.Getter; @@ -8,6 +10,8 @@ import java.util.List; import java.util.Map; import java.util.Set; +import com.iqser.red.service.persistence.service.v1.api.model.data.configuration.DictionaryEntry; + @Data public class Dictionary { @@ -61,11 +65,11 @@ public class Dictionary { public boolean containsValue(String type, String value) { return localAccessMap.containsKey(type) && localAccessMap.get(type) - .getEntries() + .getEntries().stream().map(DictionaryEntry::getValue).collect(toSet()) .contains(value) || localAccessMap.containsKey(type) && localAccessMap.get(type) .getLocalEntries() .contains(value) || localAccessMap.containsKey(RECOMMENDATION_PREFIX + type) && localAccessMap.get(RECOMMENDATION_PREFIX + type) - .getEntries() + .getEntries().stream().map(DictionaryEntry::getValue).collect(toSet()) .contains(value) || localAccessMap.containsKey(RECOMMENDATION_PREFIX + type) && localAccessMap.get(RECOMMENDATION_PREFIX + type) .getLocalEntries() .contains(value); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index a8dd3f88..9c42aebd 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -89,7 +89,7 @@ public class EntityRedactionService { String imageId = IdBuilder.buildId(image.getPosition(), image.getPage()); for (ManualImageRecategorization imageRecategorization : analyzeRequest.getManualRedactions() .getImageRecategorization()) { - if (imageRecategorization.getStatus().equals(AnnotationStatus.APPROVED) && imageRecategorization.getId() + if (imageRecategorization.getStatus().equals(AnnotationStatus.APPROVED) && imageRecategorization.getId().getId() .equals(imageId)) { image.setType(imageRecategorization.getType()); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogMergeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogMergeService.java index d45eb959..55584a7a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogMergeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogMergeService.java @@ -240,7 +240,7 @@ public class RedactionLogMergeService { RedactionLogEntry redactionLogEntry = createRedactionLogEntry(manualRedactionEntry, manualRedactionEntry .getId().getId(), dossierTemplateId); redactionLogEntry.setPositions(convertPositions(manualRedactionEntry.getPositions())); - redactionLogEntry.setComments(comments.get(manualRedactionEntry.getId())); + redactionLogEntry.setComments(comments.get(manualRedactionEntry.getId().getId())); redactionLogEntries.add(redactionLogEntry); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java index 1cd9d6d0..84b76a4e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java @@ -140,7 +140,12 @@ public class EntitySearchUtils { public void addEntitiesWithHigherRank(Set entities, Entity found, Dictionary dictionary) { if (entities.contains(found)) { - Entity existing = entities.stream().filter(entity -> entity.equals(found)).findFirst().get(); + Optional existingOptional = entities.stream().filter(entity -> entity.equals(found)).findFirst(); + if(!existingOptional.isPresent()){ + return; + } + var existing = existingOptional.get(); + if (dictionary.getDictionaryRank(existing.getType()) <= dictionary.getDictionaryRank(found.getType())) { entities.remove(found); entities.add(found); @@ -164,7 +169,11 @@ public class EntitySearchUtils { for(Entity toAdd: toBeAdded){ if (existing.contains(toAdd)) { - Entity existingEntity = existing.stream().filter(entity -> entity.equals(toAdd)).findFirst().get(); + Optional existingOptional = existing.stream().filter(entity -> entity.equals(toAdd)).findFirst(); + if(!existingOptional.isPresent()){ + return; + } + var existingEntity = existingOptional.get(); existingEntity.getEngines().addAll(toAdd.getEngines()); } else { existing.add(toAdd); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java index 22643b0b..fccbff42 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java @@ -60,76 +60,80 @@ public class PdfSegmentationService { try { //create tempFile File tempFile = File.createTempFile("document", ".pdf"); - IOUtils.copy(documentInputStream, new FileOutputStream(tempFile)); + try (var fos = new FileOutputStream(tempFile)) { + IOUtils.copy(documentInputStream, fos); - // initialize required variables - Document document = new Document(); - List pages = new ArrayList<>(); + // initialize required variables + Document document = new Document(); + List pages = new ArrayList<>(); - pdDocument = reinitializePDDocument(tempFile, null); - long pageCount = pdDocument.getNumberOfPages(); + pdDocument = reinitializePDDocument(tempFile, null); + long pageCount = pdDocument.getNumberOfPages(); - for (int pageNumber = 1; pageNumber <= pageCount; pageNumber++) { + for (int pageNumber = 1; pageNumber <= pageCount; pageNumber++) { - if (pageNumber % MAX_PAGES_BEFORE_GC == 0) { - pdDocument = reinitializePDDocument(tempFile, pdDocument); + if (pageNumber % MAX_PAGES_BEFORE_GC == 0) { + pdDocument = reinitializePDDocument(tempFile, pdDocument); + } + + PDFLinesTextStripper stripper = new PDFLinesTextStripper(); + PDPage pdPage = pdDocument.getPage(pageNumber - 1); + stripper.setPageNumber(pageNumber); + stripper.setStartPage(pageNumber); + stripper.setEndPage(pageNumber); + stripper.setPdpage(pdPage); + stripper.getText(pdDocument); + + PDRectangle pdr = pdPage.getMediaBox(); + boolean isLandscape = pdr.getWidth() > pdr.getHeight(); + + int rotation = pdPage.getRotation(); + boolean isRotated = rotation != 0 && rotation != 360; + + CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(stripper.getRulings(), stripper.getMinCharWidth(), stripper + .getMaxCharHeight()); + + Page page = blockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings + .getVertical()); + + PDRectangle cropbox = pdPage.getCropBox(); + float cropboxArea = cropbox.getHeight() * cropbox.getWidth(); + page.setCropBoxArea(cropboxArea); + + page.setRotation(rotation); + page.setLandscape(isLandscape || isRotated); + page.setPageNumber(pageNumber); + + List mergedList = imageMergeService.mergeImages(stripper.getImages(), rotation); + page.setImages(mergedList); + + tableExtractionService.extractTables(cleanRulings, page); + buildPageStatistics(page); + increaseDocumentStatistics(page, document); + + if (!ignoreImages) { + imageClassificationService.classifyImages(page); + } + + pages.add(page); } - PDFLinesTextStripper stripper = new PDFLinesTextStripper(); - PDPage pdPage = pdDocument.getPage(pageNumber - 1); - stripper.setPageNumber(pageNumber); - stripper.setStartPage(pageNumber); - stripper.setEndPage(pageNumber); - stripper.setPdpage(pdPage); - stripper.getText(pdDocument); + document.setPages(pages); - PDRectangle pdr = pdPage.getMediaBox(); - boolean isLandscape = pdr.getWidth() > pdr.getHeight(); + classificationService.classifyDocument(document); + sectionsBuilderService.buildSections(document); + sectionsBuilderService.addImagesToSections(document); - int rotation = pdPage.getRotation(); - boolean isRotated = rotation != 0 && rotation != 360; + pdDocument = reinitializePDDocument(tempFile, pdDocument); - CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(stripper.getRulings(), stripper.getMinCharWidth(), stripper - .getMaxCharHeight()); + IOUtils.close(pdDocument); - Page page = blockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings - .getVertical()); - - PDRectangle cropbox = pdPage.getCropBox(); - float cropboxArea = cropbox.getHeight() * cropbox.getWidth(); - page.setCropBoxArea(cropboxArea); - - page.setRotation(rotation); - page.setLandscape(isLandscape || isRotated); - page.setPageNumber(pageNumber); - - List mergedList = imageMergeService.mergeImages(stripper.getImages(), rotation); - page.setImages(mergedList); - - tableExtractionService.extractTables(cleanRulings, page); - buildPageStatistics(page); - increaseDocumentStatistics(page, document); - - if (!ignoreImages) { - imageClassificationService.classifyImages(page); + if(!tempFile.delete()){ + log.warn("Could not delete tmp file"); } - pages.add(page); + return document; } - - document.setPages(pages); - - classificationService.classifyDocument(document); - sectionsBuilderService.buildSections(document); - sectionsBuilderService.addImagesToSections(document); - - pdDocument = reinitializePDDocument(tempFile, pdDocument); - - IOUtils.close(pdDocument); - - tempFile.delete(); - - return document; } finally { if (pdDocument != null) { pdDocument.close();