From 0cb127eccbf496bcbbb17aa8b4de51c13aa9d3ea Mon Sep 17 00:00:00 2001 From: deiflaender Date: Mon, 7 Feb 2022 13:00:54 +0100 Subject: [PATCH] RED-3363: Fixed ignored hints in full analysis --- redaction-service-image-v1/pom.xml | 1 + redaction-service-v1/pom.xml | 1 + .../service/EntityRedactionService.java | 83 ++++++++++++++++++- .../v1/server/RedactionIntegrationTest.java | 35 ++++++++ 4 files changed, 116 insertions(+), 4 deletions(-) diff --git a/redaction-service-image-v1/pom.xml b/redaction-service-image-v1/pom.xml index b4273c35..dcd5c72e 100644 --- a/redaction-service-image-v1/pom.xml +++ b/redaction-service-image-v1/pom.xml @@ -6,6 +6,7 @@ com.iqser.red platform-docker-dependency 1.0.0 + 4.0.0 diff --git a/redaction-service-v1/pom.xml b/redaction-service-v1/pom.xml index acad7a9b..b2f67fca 100644 --- a/redaction-service-v1/pom.xml +++ b/redaction-service-v1/pom.xml @@ -6,6 +6,7 @@ platform-dependency com.iqser.red 1.1.3.3 + 4.0.0 diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index c1dc3f11..31372409 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -105,9 +105,9 @@ public class EntityRedactionService { List tables = paragraph.getTables(); for (Table table : tables) { if (table.getColCount() == 2) { - sectionSearchableTextPairs.addAll(processTableAsOneText(classifiedDoc, table, sectionNumber, dictionary, local, hintsPerSectionNumber, fileAttributes)); + sectionSearchableTextPairs.addAll(processTableAsOneText(classifiedDoc, table, sectionNumber, dictionary, local, hintsPerSectionNumber, fileAttributes, manualRedactions)); } else { - sectionSearchableTextPairs.addAll(processTablePerRow(classifiedDoc, table, sectionNumber, dictionary, local, hintsPerSectionNumber, fileAttributes)); + sectionSearchableTextPairs.addAll(processTablePerRow(classifiedDoc, table, sectionNumber, dictionary, local, hintsPerSectionNumber, fileAttributes, manualRedactions)); } sectionNumber.incrementAndGet(); } @@ -178,7 +178,7 @@ public class EntityRedactionService { AtomicInteger sectionNumber, Dictionary dictionary, boolean local, Map> hintsPerSectionNumber, - List fileAttributes) { + List fileAttributes, ManualRedactions manualRedactions) { List sectionSearchableTextPairs = new ArrayList<>(); @@ -229,6 +229,31 @@ public class EntityRedactionService { Set rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber.intValue(), dictionary, local); surroundingWordsService.addSurroundingText(rowEntities, searchableRow, dictionary, cellStarts); + if (!local && manualRedactions != null) { + + + var approvedForceRedactions = manualRedactions.getForceRedacts().stream() + .filter(fr -> fr.getStatus() == Status.APPROVED) + .filter(fr -> fr.getRequestDate() != null) + .collect(Collectors.toList()); + // only approved id removals, that haven't been forced back afterwards + var idsToRemove = manualRedactions.getIdsToRemove().stream() + .filter(idr -> idr.getStatus() == Status.APPROVED) + .filter(idr -> idr.getRequestDate() != null) + .filter(idr -> approvedForceRedactions.stream().noneMatch(forceRedact -> forceRedact.getRequestDate().isAfter(idr.getRequestDate()))) + .map(IdRemoval::getId).collect(Collectors.toSet()); + + log.info("Removed Ids: {}", idsToRemove); + + rowEntities.forEach(entity -> + entity.getPositionSequences().forEach(ps -> { + if (idsToRemove.contains(ps.getId())) { + entity.setIgnored(true); + } + })); + + } + sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder() .isLocal(local) .dictionaryTypes(dictionary.getTypes()) @@ -266,7 +291,7 @@ public class EntityRedactionService { AtomicInteger sectionNumber, Dictionary dictionary, boolean local, Map> hintsPerSectionNumber, - List fileAttributes) { + List fileAttributes, ManualRedactions manualRedactions) { List sectionSearchableTextPairs = new ArrayList<>(); SearchableText entireTableText = new SearchableText(); @@ -297,6 +322,31 @@ public class EntityRedactionService { Set rowEntities = findEntities(entireTableText, table.getHeadline(), sectionNumber.intValue(), dictionary, local); surroundingWordsService.addSurroundingText(rowEntities, entireTableText, dictionary); + if (!local && manualRedactions != null) { + + + var approvedForceRedactions = manualRedactions.getForceRedacts().stream() + .filter(fr -> fr.getStatus() == Status.APPROVED) + .filter(fr -> fr.getRequestDate() != null) + .collect(Collectors.toList()); + // only approved id removals, that haven't been forced back afterwards + var idsToRemove = manualRedactions.getIdsToRemove().stream() + .filter(idr -> idr.getStatus() == Status.APPROVED) + .filter(idr -> idr.getRequestDate() != null) + .filter(idr -> approvedForceRedactions.stream().noneMatch(forceRedact -> forceRedact.getRequestDate().isAfter(idr.getRequestDate()))) + .map(IdRemoval::getId).collect(Collectors.toSet()); + + log.info("Removed Ids: {}", idsToRemove); + + rowEntities.forEach(entity -> + entity.getPositionSequences().forEach(ps -> { + if (idsToRemove.contains(ps.getId())) { + entity.setIgnored(true); + } + })); + + } + sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder() .isLocal(local) .dictionaryTypes(dictionary.getTypes()) @@ -353,6 +403,31 @@ public class EntityRedactionService { Set entities = findEntities(searchableText, headline, sectionNumber.intValue(), dictionary, local); surroundingWordsService.addSurroundingText(entities, searchableText, dictionary); + if (!local && manualRedactions != null) { + + + var approvedForceRedactions = manualRedactions.getForceRedacts().stream() + .filter(fr -> fr.getStatus() == Status.APPROVED) + .filter(fr -> fr.getRequestDate() != null) + .collect(Collectors.toList()); + // only approved id removals, that haven't been forced back afterwards + var idsToRemove = manualRedactions.getIdsToRemove().stream() + .filter(idr -> idr.getStatus() == Status.APPROVED) + .filter(idr -> idr.getRequestDate() != null) + .filter(idr -> approvedForceRedactions.stream().noneMatch(forceRedact -> forceRedact.getRequestDate().isAfter(idr.getRequestDate()))) + .map(IdRemoval::getId).collect(Collectors.toSet()); + + log.info("Removed Ids: {}", idsToRemove); + + entities.forEach(entity -> + entity.getPositionSequences().forEach(ps -> { + if (idsToRemove.contains(ps.getId())) { + entity.setIgnored(true); + } + })); + + } + return new SectionSearchableTextPair(Section.builder() .isLocal(local) .dictionaryTypes(dictionary.getTypes()) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 59781278..bb49fdc1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -811,6 +811,41 @@ public class RedactionIntegrationTest { assertThat(cbiAddressAfterHintRemoval.isRedacted()).isTrue(); } + + + @Test + public void testIgnoreHintFirstAnalysis() { + + System.out.println("testIgnoreHintFirstAnalysis"); + + AnalyzeRequest request = prepareStorage("files/new/test-ignore-hint.pdf"); + + var manualRedactions = ManualRedactions.builder().idsToRemove(Sets.newLinkedHashSet( + IdRemoval.builder() + .id("c630599611e6e3db314518374bcf70f7") + .status(Status.APPROVED) + .user("test") + .removeFromDictionary(false) + .processedDate(OffsetDateTime.now()) + .requestDate(OffsetDateTime.now()) + .build())).build(); + + request.setManualRedactions(manualRedactions); + reanalyzeService.analyze(request); + + var mergedRedactionLog = redactionController.getRedactionLog(RedactionRequest.builder().withSectionDataForManualRedactions(true) + .manualRedactions(manualRedactions) + .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) + .dossierId(TEST_DOSSIER_ID) + .fileId(TEST_FILE_ID) + .build()); + + var cbiAddressAfterHintRemoval = mergedRedactionLog.getRedactionLogEntry().stream().filter(re -> re.getType().equalsIgnoreCase("CBI_Address")).findAny().get(); + assertThat(cbiAddressAfterHintRemoval.isRedacted()).isTrue(); + } + + + @Test public void testTableRedaction() throws IOException {