Pull request #340: RED-3363: Fixed ignored hints in full analysis

Merge in RED/redaction-service from RED-3363 to release/2.52.x

* commit '0cb127eccbf496bcbbb17aa8b4de51c13aa9d3ea':
  RED-3363: Fixed ignored hints in full analysis
This commit is contained in:
Dominique Eiflaender 2022-02-07 13:27:54 +01:00
commit 3e1e7bd3f7
4 changed files with 116 additions and 4 deletions

View File

@ -6,6 +6,7 @@
<groupId>com.iqser.red</groupId>
<artifactId>platform-docker-dependency</artifactId>
<version>1.0.0</version>
<relativePath />
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -6,6 +6,7 @@
<artifactId>platform-dependency</artifactId>
<groupId>com.iqser.red</groupId>
<version>1.1.3.3</version>
<relativePath />
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -105,9 +105,9 @@ public class EntityRedactionService {
List<Table> tables = paragraph.getTables();
for (Table table : tables) {
if (table.getColCount() == 2) {
sectionSearchableTextPairs.addAll(processTableAsOneText(classifiedDoc, table, sectionNumber, dictionary, local, hintsPerSectionNumber, fileAttributes));
sectionSearchableTextPairs.addAll(processTableAsOneText(classifiedDoc, table, sectionNumber, dictionary, local, hintsPerSectionNumber, fileAttributes, manualRedactions));
} else {
sectionSearchableTextPairs.addAll(processTablePerRow(classifiedDoc, table, sectionNumber, dictionary, local, hintsPerSectionNumber, fileAttributes));
sectionSearchableTextPairs.addAll(processTablePerRow(classifiedDoc, table, sectionNumber, dictionary, local, hintsPerSectionNumber, fileAttributes, manualRedactions));
}
sectionNumber.incrementAndGet();
}
@ -178,7 +178,7 @@ public class EntityRedactionService {
AtomicInteger sectionNumber, Dictionary dictionary,
boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber,
List<FileAttribute> fileAttributes) {
List<FileAttribute> fileAttributes, ManualRedactions manualRedactions) {
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
@ -229,6 +229,31 @@ public class EntityRedactionService {
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber.intValue(), dictionary, local);
surroundingWordsService.addSurroundingText(rowEntities, searchableRow, dictionary, cellStarts);
if (!local && manualRedactions != null) {
var approvedForceRedactions = manualRedactions.getForceRedacts().stream()
.filter(fr -> fr.getStatus() == Status.APPROVED)
.filter(fr -> fr.getRequestDate() != null)
.collect(Collectors.toList());
// only approved id removals, that haven't been forced back afterwards
var idsToRemove = manualRedactions.getIdsToRemove().stream()
.filter(idr -> idr.getStatus() == Status.APPROVED)
.filter(idr -> idr.getRequestDate() != null)
.filter(idr -> approvedForceRedactions.stream().noneMatch(forceRedact -> forceRedact.getRequestDate().isAfter(idr.getRequestDate())))
.map(IdRemoval::getId).collect(Collectors.toSet());
log.info("Removed Ids: {}", idsToRemove);
rowEntities.forEach(entity ->
entity.getPositionSequences().forEach(ps -> {
if (idsToRemove.contains(ps.getId())) {
entity.setIgnored(true);
}
}));
}
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
.isLocal(local)
.dictionaryTypes(dictionary.getTypes())
@ -266,7 +291,7 @@ public class EntityRedactionService {
AtomicInteger sectionNumber, Dictionary dictionary,
boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber,
List<FileAttribute> fileAttributes) {
List<FileAttribute> fileAttributes, ManualRedactions manualRedactions) {
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
SearchableText entireTableText = new SearchableText();
@ -297,6 +322,31 @@ public class EntityRedactionService {
Set<Entity> rowEntities = findEntities(entireTableText, table.getHeadline(), sectionNumber.intValue(), dictionary, local);
surroundingWordsService.addSurroundingText(rowEntities, entireTableText, dictionary);
if (!local && manualRedactions != null) {
var approvedForceRedactions = manualRedactions.getForceRedacts().stream()
.filter(fr -> fr.getStatus() == Status.APPROVED)
.filter(fr -> fr.getRequestDate() != null)
.collect(Collectors.toList());
// only approved id removals, that haven't been forced back afterwards
var idsToRemove = manualRedactions.getIdsToRemove().stream()
.filter(idr -> idr.getStatus() == Status.APPROVED)
.filter(idr -> idr.getRequestDate() != null)
.filter(idr -> approvedForceRedactions.stream().noneMatch(forceRedact -> forceRedact.getRequestDate().isAfter(idr.getRequestDate())))
.map(IdRemoval::getId).collect(Collectors.toSet());
log.info("Removed Ids: {}", idsToRemove);
rowEntities.forEach(entity ->
entity.getPositionSequences().forEach(ps -> {
if (idsToRemove.contains(ps.getId())) {
entity.setIgnored(true);
}
}));
}
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
.isLocal(local)
.dictionaryTypes(dictionary.getTypes())
@ -353,6 +403,31 @@ public class EntityRedactionService {
Set<Entity> entities = findEntities(searchableText, headline, sectionNumber.intValue(), dictionary, local);
surroundingWordsService.addSurroundingText(entities, searchableText, dictionary);
if (!local && manualRedactions != null) {
var approvedForceRedactions = manualRedactions.getForceRedacts().stream()
.filter(fr -> fr.getStatus() == Status.APPROVED)
.filter(fr -> fr.getRequestDate() != null)
.collect(Collectors.toList());
// only approved id removals, that haven't been forced back afterwards
var idsToRemove = manualRedactions.getIdsToRemove().stream()
.filter(idr -> idr.getStatus() == Status.APPROVED)
.filter(idr -> idr.getRequestDate() != null)
.filter(idr -> approvedForceRedactions.stream().noneMatch(forceRedact -> forceRedact.getRequestDate().isAfter(idr.getRequestDate())))
.map(IdRemoval::getId).collect(Collectors.toSet());
log.info("Removed Ids: {}", idsToRemove);
entities.forEach(entity ->
entity.getPositionSequences().forEach(ps -> {
if (idsToRemove.contains(ps.getId())) {
entity.setIgnored(true);
}
}));
}
return new SectionSearchableTextPair(Section.builder()
.isLocal(local)
.dictionaryTypes(dictionary.getTypes())

View File

@ -811,6 +811,41 @@ public class RedactionIntegrationTest {
assertThat(cbiAddressAfterHintRemoval.isRedacted()).isTrue();
}
@Test
public void testIgnoreHintFirstAnalysis() {
System.out.println("testIgnoreHintFirstAnalysis");
AnalyzeRequest request = prepareStorage("files/new/test-ignore-hint.pdf");
var manualRedactions = ManualRedactions.builder().idsToRemove(Sets.newLinkedHashSet(
IdRemoval.builder()
.id("c630599611e6e3db314518374bcf70f7")
.status(Status.APPROVED)
.user("test")
.removeFromDictionary(false)
.processedDate(OffsetDateTime.now())
.requestDate(OffsetDateTime.now())
.build())).build();
request.setManualRedactions(manualRedactions);
reanalyzeService.analyze(request);
var mergedRedactionLog = redactionController.getRedactionLog(RedactionRequest.builder().withSectionDataForManualRedactions(true)
.manualRedactions(manualRedactions)
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());
var cbiAddressAfterHintRemoval = mergedRedactionLog.getRedactionLogEntry().stream().filter(re -> re.getType().equalsIgnoreCase("CBI_Address")).findAny().get();
assertThat(cbiAddressAfterHintRemoval.isRedacted()).isTrue();
}
@Test
public void testTableRedaction() throws IOException {