Adjust test to added rule and fix vertical header propagation for row >

2
This commit is contained in:
Thierry Göckel 2020-08-25 15:41:44 +02:00
parent 848c506c3f
commit 3c590dcf1d
2 changed files with 42 additions and 4 deletions

View File

@ -115,8 +115,12 @@ public class Table extends AbstractTextContainer {
cell.getHeaderCells().add(lastHeaderCell); cell.getHeaderCells().add(lastHeaderCell);
} }
List<Cell> cellsToTheTop = new ArrayList<>(); List<Cell> cellsToTheTop = new ArrayList<>();
for (int i = rowIndex - 1; i >= 0; i--) { for (int i = 0; i < rowIndex; i++) {
cellsToTheTop.add(rows.get(i).get(colIndex)); try {
cellsToTheTop.add(rows.get(i).get(colIndex));
} catch (IndexOutOfBoundsException e) {
log.warn("No cell {} in row {}, ignoring.", colIndex, rowIndex);
}
} }
for (Cell topCell : cellsToTheTop) { for (Cell topCell : cellsToTheTop) {
if (topCell.isHeaderCell()) { if (topCell.isHeaderCell()) {

View File

@ -130,7 +130,7 @@ public class EntityRedactionServiceTest {
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument); Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
entityRedactionService.processDocument(classifiedDoc, null); entityRedactionService.processDocument(classifiedDoc, null);
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1)).hasSize(5); // 4 out of 5 entities recognized on page 1 assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 names, 1 address, 1 Y and 2 N entities
} }
} }
@ -193,6 +193,7 @@ public class EntityRedactionServiceTest {
} }
@Test @Test
public void headerPropagation() throws IOException { public void headerPropagation() throws IOException {
@ -219,6 +220,31 @@ public class EntityRedactionServiceTest {
} }
@Test
public void testNGuideline() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Empty Tabular Data.pdf");
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(Collections.singletonList("Aldershof S."))
.build();
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland"))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
entityRedactionService.processDocument(classifiedDoc, null);
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(6);
}
}
@Before @Before
public void stubRedaction() { public void stubRedaction() {
String tableRules = "package drools\n" + String tableRules = "package drools\n" +
@ -226,12 +252,20 @@ public class EntityRedactionServiceTest {
"import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" + "import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" +
"\n" + "\n" +
"global Section section\n" + "global Section section\n" +
"rule \"8: Not redacted because Vertebrate Study = N\"\n" +
" when\n" +
" Section(rowEquals(\"Vertebrate study Y/N\", \"N\"))\n" +
" then\n" +
" section.redactNot(\"name\", 8, \"Not redacted because row is not a vertebrate study\");\n" +
" section.redactNot(\"address\", 8, \"Not redacted because row is not a vertebrate study\");\n" +
" section.highlightCell(\"Vertebrate study Y/N\", 8, \"hint_only\");\n" +
" end\n" +
"rule \"9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study\"\n" + "rule \"9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study\"\n" +
" when\n" + " when\n" +
" Section(rowEquals(\"Vertebrate study Y/N\", \"Y\"))\n" + " Section(rowEquals(\"Vertebrate study Y/N\", \"Y\"))\n" +
" then\n" + " then\n" +
" section.redact(\"name\", 9, \"Redacted because row is a vertebrate study\");\n" + " section.redact(\"name\", 9, \"Redacted because row is a vertebrate study\");\n" +
" section.redact(\"address\", 9, \"Redacted because rows is a vertebrate study\");\n" + " section.redact(\"address\", 9, \"Redacted because row is a vertebrate study\");\n" +
" section.highlightCell(\"Vertebrate study Y/N\", 9, \"must_redact\");\n" + " section.highlightCell(\"Vertebrate study Y/N\", 9, \"must_redact\");\n" +
" end"; " end";
when(rulesClient.getVersion()).thenReturn(1L); when(rulesClient.getVersion()).thenReturn(1L);