Adjust test to added rule and fix vertical header propagation for row >

2
This commit is contained in:
Thierry Göckel 2020-08-25 15:41:44 +02:00
parent 848c506c3f
commit 3c590dcf1d
2 changed files with 42 additions and 4 deletions

View File

@ -115,8 +115,12 @@ public class Table extends AbstractTextContainer {
cell.getHeaderCells().add(lastHeaderCell);
}
List<Cell> cellsToTheTop = new ArrayList<>();
for (int i = rowIndex - 1; i >= 0; i--) {
cellsToTheTop.add(rows.get(i).get(colIndex));
for (int i = 0; i < rowIndex; i++) {
try {
cellsToTheTop.add(rows.get(i).get(colIndex));
} catch (IndexOutOfBoundsException e) {
log.warn("No cell {} in row {}, ignoring.", colIndex, rowIndex);
}
}
for (Cell topCell : cellsToTheTop) {
if (topCell.isHeaderCell()) {

View File

@ -130,7 +130,7 @@ public class EntityRedactionServiceTest {
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
entityRedactionService.processDocument(classifiedDoc, null);
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1)).hasSize(5); // 4 out of 5 entities recognized on page 1
assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 names, 1 address, 1 Y and 2 N entities
}
}
@ -193,6 +193,7 @@ public class EntityRedactionServiceTest {
}
@Test
public void headerPropagation() throws IOException {
@ -219,6 +220,31 @@ public class EntityRedactionServiceTest {
}
@Test
public void testNGuideline() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Empty Tabular Data.pdf");
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(Collections.singletonList("Aldershof S."))
.build();
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland"))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
entityRedactionService.processDocument(classifiedDoc, null);
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(6);
}
}
@Before
public void stubRedaction() {
String tableRules = "package drools\n" +
@ -226,12 +252,20 @@ public class EntityRedactionServiceTest {
"import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" +
"\n" +
"global Section section\n" +
"rule \"8: Not redacted because Vertebrate Study = N\"\n" +
" when\n" +
" Section(rowEquals(\"Vertebrate study Y/N\", \"N\"))\n" +
" then\n" +
" section.redactNot(\"name\", 8, \"Not redacted because row is not a vertebrate study\");\n" +
" section.redactNot(\"address\", 8, \"Not redacted because row is not a vertebrate study\");\n" +
" section.highlightCell(\"Vertebrate study Y/N\", 8, \"hint_only\");\n" +
" end\n" +
"rule \"9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study\"\n" +
" when\n" +
" Section(rowEquals(\"Vertebrate study Y/N\", \"Y\"))\n" +
" then\n" +
" section.redact(\"name\", 9, \"Redacted because row is a vertebrate study\");\n" +
" section.redact(\"address\", 9, \"Redacted because rows is a vertebrate study\");\n" +
" section.redact(\"address\", 9, \"Redacted because row is a vertebrate study\");\n" +
" section.highlightCell(\"Vertebrate study Y/N\", 9, \"must_redact\");\n" +
" end";
when(rulesClient.getVersion()).thenReturn(1L);