Pull request #340: RED-3363: Fixed ignored hints in full analysis
Merge in RED/redaction-service from RED-3363 to release/2.52.x * commit '0cb127eccbf496bcbbb17aa8b4de51c13aa9d3ea': RED-3363: Fixed ignored hints in full analysis
This commit is contained in:
commit
3e1e7bd3f7
@ -6,6 +6,7 @@
|
||||
<groupId>com.iqser.red</groupId>
|
||||
<artifactId>platform-docker-dependency</artifactId>
|
||||
<version>1.0.0</version>
|
||||
<relativePath />
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
||||
@ -6,6 +6,7 @@
|
||||
<artifactId>platform-dependency</artifactId>
|
||||
<groupId>com.iqser.red</groupId>
|
||||
<version>1.1.3.3</version>
|
||||
<relativePath />
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
||||
@ -105,9 +105,9 @@ public class EntityRedactionService {
|
||||
List<Table> tables = paragraph.getTables();
|
||||
for (Table table : tables) {
|
||||
if (table.getColCount() == 2) {
|
||||
sectionSearchableTextPairs.addAll(processTableAsOneText(classifiedDoc, table, sectionNumber, dictionary, local, hintsPerSectionNumber, fileAttributes));
|
||||
sectionSearchableTextPairs.addAll(processTableAsOneText(classifiedDoc, table, sectionNumber, dictionary, local, hintsPerSectionNumber, fileAttributes, manualRedactions));
|
||||
} else {
|
||||
sectionSearchableTextPairs.addAll(processTablePerRow(classifiedDoc, table, sectionNumber, dictionary, local, hintsPerSectionNumber, fileAttributes));
|
||||
sectionSearchableTextPairs.addAll(processTablePerRow(classifiedDoc, table, sectionNumber, dictionary, local, hintsPerSectionNumber, fileAttributes, manualRedactions));
|
||||
}
|
||||
sectionNumber.incrementAndGet();
|
||||
}
|
||||
@ -178,7 +178,7 @@ public class EntityRedactionService {
|
||||
AtomicInteger sectionNumber, Dictionary dictionary,
|
||||
boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber,
|
||||
List<FileAttribute> fileAttributes) {
|
||||
List<FileAttribute> fileAttributes, ManualRedactions manualRedactions) {
|
||||
|
||||
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
|
||||
|
||||
@ -229,6 +229,31 @@ public class EntityRedactionService {
|
||||
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber.intValue(), dictionary, local);
|
||||
surroundingWordsService.addSurroundingText(rowEntities, searchableRow, dictionary, cellStarts);
|
||||
|
||||
if (!local && manualRedactions != null) {
|
||||
|
||||
|
||||
var approvedForceRedactions = manualRedactions.getForceRedacts().stream()
|
||||
.filter(fr -> fr.getStatus() == Status.APPROVED)
|
||||
.filter(fr -> fr.getRequestDate() != null)
|
||||
.collect(Collectors.toList());
|
||||
// only approved id removals, that haven't been forced back afterwards
|
||||
var idsToRemove = manualRedactions.getIdsToRemove().stream()
|
||||
.filter(idr -> idr.getStatus() == Status.APPROVED)
|
||||
.filter(idr -> idr.getRequestDate() != null)
|
||||
.filter(idr -> approvedForceRedactions.stream().noneMatch(forceRedact -> forceRedact.getRequestDate().isAfter(idr.getRequestDate())))
|
||||
.map(IdRemoval::getId).collect(Collectors.toSet());
|
||||
|
||||
log.info("Removed Ids: {}", idsToRemove);
|
||||
|
||||
rowEntities.forEach(entity ->
|
||||
entity.getPositionSequences().forEach(ps -> {
|
||||
if (idsToRemove.contains(ps.getId())) {
|
||||
entity.setIgnored(true);
|
||||
}
|
||||
}));
|
||||
|
||||
}
|
||||
|
||||
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(local)
|
||||
.dictionaryTypes(dictionary.getTypes())
|
||||
@ -266,7 +291,7 @@ public class EntityRedactionService {
|
||||
AtomicInteger sectionNumber, Dictionary dictionary,
|
||||
boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber,
|
||||
List<FileAttribute> fileAttributes) {
|
||||
List<FileAttribute> fileAttributes, ManualRedactions manualRedactions) {
|
||||
|
||||
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
|
||||
SearchableText entireTableText = new SearchableText();
|
||||
@ -297,6 +322,31 @@ public class EntityRedactionService {
|
||||
Set<Entity> rowEntities = findEntities(entireTableText, table.getHeadline(), sectionNumber.intValue(), dictionary, local);
|
||||
surroundingWordsService.addSurroundingText(rowEntities, entireTableText, dictionary);
|
||||
|
||||
if (!local && manualRedactions != null) {
|
||||
|
||||
|
||||
var approvedForceRedactions = manualRedactions.getForceRedacts().stream()
|
||||
.filter(fr -> fr.getStatus() == Status.APPROVED)
|
||||
.filter(fr -> fr.getRequestDate() != null)
|
||||
.collect(Collectors.toList());
|
||||
// only approved id removals, that haven't been forced back afterwards
|
||||
var idsToRemove = manualRedactions.getIdsToRemove().stream()
|
||||
.filter(idr -> idr.getStatus() == Status.APPROVED)
|
||||
.filter(idr -> idr.getRequestDate() != null)
|
||||
.filter(idr -> approvedForceRedactions.stream().noneMatch(forceRedact -> forceRedact.getRequestDate().isAfter(idr.getRequestDate())))
|
||||
.map(IdRemoval::getId).collect(Collectors.toSet());
|
||||
|
||||
log.info("Removed Ids: {}", idsToRemove);
|
||||
|
||||
rowEntities.forEach(entity ->
|
||||
entity.getPositionSequences().forEach(ps -> {
|
||||
if (idsToRemove.contains(ps.getId())) {
|
||||
entity.setIgnored(true);
|
||||
}
|
||||
}));
|
||||
|
||||
}
|
||||
|
||||
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(local)
|
||||
.dictionaryTypes(dictionary.getTypes())
|
||||
@ -353,6 +403,31 @@ public class EntityRedactionService {
|
||||
Set<Entity> entities = findEntities(searchableText, headline, sectionNumber.intValue(), dictionary, local);
|
||||
surroundingWordsService.addSurroundingText(entities, searchableText, dictionary);
|
||||
|
||||
if (!local && manualRedactions != null) {
|
||||
|
||||
|
||||
var approvedForceRedactions = manualRedactions.getForceRedacts().stream()
|
||||
.filter(fr -> fr.getStatus() == Status.APPROVED)
|
||||
.filter(fr -> fr.getRequestDate() != null)
|
||||
.collect(Collectors.toList());
|
||||
// only approved id removals, that haven't been forced back afterwards
|
||||
var idsToRemove = manualRedactions.getIdsToRemove().stream()
|
||||
.filter(idr -> idr.getStatus() == Status.APPROVED)
|
||||
.filter(idr -> idr.getRequestDate() != null)
|
||||
.filter(idr -> approvedForceRedactions.stream().noneMatch(forceRedact -> forceRedact.getRequestDate().isAfter(idr.getRequestDate())))
|
||||
.map(IdRemoval::getId).collect(Collectors.toSet());
|
||||
|
||||
log.info("Removed Ids: {}", idsToRemove);
|
||||
|
||||
entities.forEach(entity ->
|
||||
entity.getPositionSequences().forEach(ps -> {
|
||||
if (idsToRemove.contains(ps.getId())) {
|
||||
entity.setIgnored(true);
|
||||
}
|
||||
}));
|
||||
|
||||
}
|
||||
|
||||
return new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(local)
|
||||
.dictionaryTypes(dictionary.getTypes())
|
||||
|
||||
@ -811,6 +811,41 @@ public class RedactionIntegrationTest {
|
||||
assertThat(cbiAddressAfterHintRemoval.isRedacted()).isTrue();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testIgnoreHintFirstAnalysis() {
|
||||
|
||||
System.out.println("testIgnoreHintFirstAnalysis");
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/new/test-ignore-hint.pdf");
|
||||
|
||||
var manualRedactions = ManualRedactions.builder().idsToRemove(Sets.newLinkedHashSet(
|
||||
IdRemoval.builder()
|
||||
.id("c630599611e6e3db314518374bcf70f7")
|
||||
.status(Status.APPROVED)
|
||||
.user("test")
|
||||
.removeFromDictionary(false)
|
||||
.processedDate(OffsetDateTime.now())
|
||||
.requestDate(OffsetDateTime.now())
|
||||
.build())).build();
|
||||
|
||||
request.setManualRedactions(manualRedactions);
|
||||
reanalyzeService.analyze(request);
|
||||
|
||||
var mergedRedactionLog = redactionController.getRedactionLog(RedactionRequest.builder().withSectionDataForManualRedactions(true)
|
||||
.manualRedactions(manualRedactions)
|
||||
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
.fileId(TEST_FILE_ID)
|
||||
.build());
|
||||
|
||||
var cbiAddressAfterHintRemoval = mergedRedactionLog.getRedactionLogEntry().stream().filter(re -> re.getType().equalsIgnoreCase("CBI_Address")).findAny().get();
|
||||
assertThat(cbiAddressAfterHintRemoval.isRedacted()).isTrue();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testTableRedaction() throws IOException {
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user