Pull request #301: RED-3086: Fixed overlapping rule and dictionary entries

Merge in RED/redaction-service from RED-3086 to release/2.52.x

* commit 'a62c94b34411abff6106c6d9239a1ec40f873d08':
  RED-3086: Fixed overlapping rule and dictionary entries
This commit is contained in:
Dominique Eiflaender 2021-12-15 10:42:47 +01:00
commit 4f5210d91c
4 changed files with 11 additions and 1 deletions

View File

@ -159,6 +159,8 @@ public class EntitySearchUtils {
// HashSet keeps old value but we want the new.
entities.removeAll(found);
entities.addAll(found);
removeEntitiesContainedInLarger(entities);
}

View File

@ -673,7 +673,7 @@ public class RedactionIntegrationTest {
public void redactionTest() throws IOException {
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/new/just-a-document-20211215.pdf");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
request.setExcludedPages(Set.of(1));

View File

@ -1655,3 +1655,11 @@ Zyma SA, Nyon, Switzerland
Mambo-Tox Ltd. Biomedical Sciences Building Bassett Crescent East Southampton SO16 7PX UK
Syngenta Environmental Sciences Jealotts Hill International Research Centre Bracknell, Berkshire RG42 6EY UK
Test Ignored Hint CBI_ADDRESS
CTL
CTL - Central Toxicology Laboratory
CTL/P/3557
CTL/P/3607
CTL/P/S 161
CTL/Pl4333
CTL/with dictionary entry