Adjust test to added rule and fix vertical header propagation for row >
2
This commit is contained in:
parent
848c506c3f
commit
3c590dcf1d
@ -115,8 +115,12 @@ public class Table extends AbstractTextContainer {
|
|||||||
cell.getHeaderCells().add(lastHeaderCell);
|
cell.getHeaderCells().add(lastHeaderCell);
|
||||||
}
|
}
|
||||||
List<Cell> cellsToTheTop = new ArrayList<>();
|
List<Cell> cellsToTheTop = new ArrayList<>();
|
||||||
for (int i = rowIndex - 1; i >= 0; i--) {
|
for (int i = 0; i < rowIndex; i++) {
|
||||||
cellsToTheTop.add(rows.get(i).get(colIndex));
|
try {
|
||||||
|
cellsToTheTop.add(rows.get(i).get(colIndex));
|
||||||
|
} catch (IndexOutOfBoundsException e) {
|
||||||
|
log.warn("No cell {} in row {}, ignoring.", colIndex, rowIndex);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
for (Cell topCell : cellsToTheTop) {
|
for (Cell topCell : cellsToTheTop) {
|
||||||
if (topCell.isHeaderCell()) {
|
if (topCell.isHeaderCell()) {
|
||||||
|
|||||||
@ -130,7 +130,7 @@ public class EntityRedactionServiceTest {
|
|||||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||||
entityRedactionService.processDocument(classifiedDoc, null);
|
entityRedactionService.processDocument(classifiedDoc, null);
|
||||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||||
assertThat(classifiedDoc.getEntities().get(1)).hasSize(5); // 4 out of 5 entities recognized on page 1
|
assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 names, 1 address, 1 Y and 2 N entities
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -193,6 +193,7 @@ public class EntityRedactionServiceTest {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void headerPropagation() throws IOException {
|
public void headerPropagation() throws IOException {
|
||||||
|
|
||||||
@ -219,6 +220,31 @@ public class EntityRedactionServiceTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNGuideline() throws IOException {
|
||||||
|
|
||||||
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Empty Tabular Data.pdf");
|
||||||
|
|
||||||
|
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||||
|
.entries(Collections.singletonList("Aldershof S."))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||||
|
when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(dictionaryResponse);
|
||||||
|
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||||
|
.entries(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland"))
|
||||||
|
.build();
|
||||||
|
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
|
||||||
|
|
||||||
|
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||||
|
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||||
|
entityRedactionService.processDocument(classifiedDoc, null);
|
||||||
|
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||||
|
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(6);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void stubRedaction() {
|
public void stubRedaction() {
|
||||||
String tableRules = "package drools\n" +
|
String tableRules = "package drools\n" +
|
||||||
@ -226,12 +252,20 @@ public class EntityRedactionServiceTest {
|
|||||||
"import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" +
|
"import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"global Section section\n" +
|
"global Section section\n" +
|
||||||
|
"rule \"8: Not redacted because Vertebrate Study = N\"\n" +
|
||||||
|
" when\n" +
|
||||||
|
" Section(rowEquals(\"Vertebrate study Y/N\", \"N\"))\n" +
|
||||||
|
" then\n" +
|
||||||
|
" section.redactNot(\"name\", 8, \"Not redacted because row is not a vertebrate study\");\n" +
|
||||||
|
" section.redactNot(\"address\", 8, \"Not redacted because row is not a vertebrate study\");\n" +
|
||||||
|
" section.highlightCell(\"Vertebrate study Y/N\", 8, \"hint_only\");\n" +
|
||||||
|
" end\n" +
|
||||||
"rule \"9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study\"\n" +
|
"rule \"9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study\"\n" +
|
||||||
" when\n" +
|
" when\n" +
|
||||||
" Section(rowEquals(\"Vertebrate study Y/N\", \"Y\"))\n" +
|
" Section(rowEquals(\"Vertebrate study Y/N\", \"Y\"))\n" +
|
||||||
" then\n" +
|
" then\n" +
|
||||||
" section.redact(\"name\", 9, \"Redacted because row is a vertebrate study\");\n" +
|
" section.redact(\"name\", 9, \"Redacted because row is a vertebrate study\");\n" +
|
||||||
" section.redact(\"address\", 9, \"Redacted because rows is a vertebrate study\");\n" +
|
" section.redact(\"address\", 9, \"Redacted because row is a vertebrate study\");\n" +
|
||||||
" section.highlightCell(\"Vertebrate study Y/N\", 9, \"must_redact\");\n" +
|
" section.highlightCell(\"Vertebrate study Y/N\", 9, \"must_redact\");\n" +
|
||||||
" end";
|
" end";
|
||||||
when(rulesClient.getVersion()).thenReturn(1L);
|
when(rulesClient.getVersion()).thenReturn(1L);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user