Pull request #35: Fix redaction in single cell tables
Merge in RED/redaction-service from RED-279 to master * commit '3d5455d7297e5805ad30a578d00d6d1737820c7d': Fix redaction in single cell tables
This commit is contained in:
commit
34e058c4e4
@ -52,12 +52,13 @@ public class EntityRedactionService {
|
||||
List<Table> tables = paragraph.getTables();
|
||||
|
||||
for (Table table : tables) {
|
||||
boolean singleCellTable = table.getRowCount() == 1 && table.getColCount() == 1;
|
||||
for (List<Cell> row : table.getRows()) {
|
||||
SearchableText searchableRow = new SearchableText();
|
||||
Map<String, CellValue> tabularData = new HashMap<>();
|
||||
int start = 0;
|
||||
for (Cell cell : row) {
|
||||
if (cell.isHeaderCell() || CollectionUtils.isEmpty(cell.getTextBlocks())) {
|
||||
if (!singleCellTable && cell.isHeaderCell() || CollectionUtils.isEmpty(cell.getTextBlocks())) {
|
||||
continue;
|
||||
}
|
||||
addSectionToManualRedactions(cell.getTextBlocks(), manualRedactions, table.getHeadline(), sectionNumber);
|
||||
|
||||
@ -57,6 +57,7 @@ public class EntityRedactionServiceTest {
|
||||
private static final String ADDRESS_CODE = "address";
|
||||
|
||||
private static final AtomicLong DICTIONARY_VERSION = new AtomicLong();
|
||||
private static final AtomicLong RULES_VERSION = new AtomicLong();
|
||||
@MockBean
|
||||
private DictionaryClient dictionaryClient;
|
||||
|
||||
@ -69,6 +70,9 @@ public class EntityRedactionServiceTest {
|
||||
@Autowired
|
||||
private PdfSegmentationService pdfSegmentationService;
|
||||
|
||||
@Autowired
|
||||
private DroolsExecutionService droolsExecutionService;
|
||||
|
||||
@TestConfiguration
|
||||
public static class RedactionIntegrationTestConfiguration {
|
||||
|
||||
@ -185,7 +189,7 @@ public class EntityRedactionServiceTest {
|
||||
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc, null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // two pages
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream()
|
||||
.filter(entity -> entity.getMatchedRule() == 9)
|
||||
.count()).isEqualTo(10);
|
||||
@ -194,6 +198,60 @@ public class EntityRedactionServiceTest {
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testApplicantInTableRedaction() throws IOException {
|
||||
|
||||
String tableRules = "package drools\n" +
|
||||
"\n" +
|
||||
"import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" +
|
||||
"\n" +
|
||||
"global Section section\n" +
|
||||
"rule \"6: Redact contact information if applicant is found\"\n" +
|
||||
" when\n" +
|
||||
" eval(section.headlineContainsWord(\"applicant\") || section.getText().contains(\"Applicant\"));\n" +
|
||||
" then\n" +
|
||||
" section.redactLineAfter(\"Name:\", \"address\", 6, \"Applicant information was found\");\n" +
|
||||
" section.redactBetween(\"Address:\", \"Contact\", \"address\", 6, \"Applicant information was found\");\n" +
|
||||
" section.redactLineAfter(\"Contact point:\", \"address\", 6, \"Applicant information was found\");\n" +
|
||||
" section.redactLineAfter(\"Phone:\", \"address\", 6, \"Applicant information was found\");\n" +
|
||||
" section.redactLineAfter(\"Fax:\", \"address\", 6, \"Applicant information was found\");\n" +
|
||||
" section.redactLineAfter(\"Tel.:\", \"address\", 6, \"Applicant information was found\");\n" +
|
||||
" section.redactLineAfter(\"Tel:\", \"address\", 6, \"Applicant information was found\");\n" +
|
||||
" section.redactLineAfter(\"E-mail:\", \"address\", 6, \"Applicant information was found\");\n" +
|
||||
" section.redactLineAfter(\"Email:\", \"address\", 6, \"Applicant information was found\");\n" +
|
||||
" section.redactLineAfter(\"Contact:\", \"address\", 6, \"Applicant information was found\");\n" +
|
||||
" section.redactLineAfter(\"Telephone number:\", \"address\", 6, \"Applicant information was found\");\n" +
|
||||
" section.redactLineAfter(\"Fax number:\", \"address\", 6, \"Applicant information was found\");\n" +
|
||||
" section.redactLineAfter(\"Telephone:\", \"address\", 6, \"Applicant information was found\");\n" +
|
||||
" section.redactBetween(\"No:\", \"Fax\", \"address\", 6, \"Applicant information was found\");\n" +
|
||||
" section.redactBetween(\"Contact:\", \"Tel.:\", \"address\", 6, \"Applicant information was found\");\n" +
|
||||
" end";
|
||||
when(rulesClient.getVersion()).thenReturn(RULES_VERSION.incrementAndGet());
|
||||
when(rulesClient.getRules()).thenReturn(new RulesResponse(tableRules));
|
||||
droolsExecutionService.updateRules();
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Applicant Producer Table.pdf");
|
||||
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/names.txt")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/addresses.txt")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
|
||||
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc, null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream()
|
||||
.filter(entity -> entity.getMatchedRule() == 6)
|
||||
.count()).isEqualTo(18);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void headerPropagation() throws IOException {
|
||||
|
||||
@ -268,7 +326,7 @@ public class EntityRedactionServiceTest {
|
||||
" section.redact(\"address\", 9, \"Redacted because row is a vertebrate study\");\n" +
|
||||
" section.highlightCell(\"Vertebrate study Y/N\", 9, \"must_redact\");\n" +
|
||||
" end";
|
||||
when(rulesClient.getVersion()).thenReturn(1L);
|
||||
when(rulesClient.getVersion()).thenReturn(RULES_VERSION.incrementAndGet());
|
||||
when(rulesClient.getRules()).thenReturn(new RulesResponse(tableRules));
|
||||
TypeResponse typeResponse = TypeResponse.builder()
|
||||
.types(Arrays.asList(
|
||||
|
||||
@ -49,7 +49,7 @@ rule "5: Do not redact in guideline sections"
|
||||
section.redactNot("address", 5, "Section is a guideline section.");
|
||||
end
|
||||
|
||||
rule "6: Redact contact information, if applicant is found"
|
||||
rule "6: Redact contact information if applicant is found"
|
||||
when
|
||||
eval(section.headlineContainsWord("applicant") || section.getText().contains("Applicant"));
|
||||
then
|
||||
@ -70,7 +70,7 @@ rule "6: Redact contact information, if applicant is found"
|
||||
section.redactBetween("Contact:", "Tel.:", "address", 6, "Applicant information was found");
|
||||
end
|
||||
|
||||
rule "7: Redact contact information, if Producer is found"
|
||||
rule "7: Redact contact information if Producer is found"
|
||||
when
|
||||
eval(section.getText().toLowerCase().contains("producer of the plant protection") || section.getText().toLowerCase().contains("producer of the active substance") || section.getText().contains("Manufacturer of the active substance") || section.getText().contains("Manufacturer:") || section.getText().contains("Producer or producers of the active substance"));
|
||||
then
|
||||
@ -110,7 +110,7 @@ rule "9: Redact if must redact entry is found"
|
||||
end
|
||||
|
||||
|
||||
rule "10: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study"
|
||||
rule "10: Redact Authors and Addresses in Reference Table if it is a Vertebrate study"
|
||||
when
|
||||
Section(rowEquals("Vertebrate study Y/N", "Y"))
|
||||
then
|
||||
|
||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user