diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java index 1a278855..73418718 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java @@ -115,8 +115,12 @@ public class Table extends AbstractTextContainer { cell.getHeaderCells().add(lastHeaderCell); } List cellsToTheTop = new ArrayList<>(); - for (int i = rowIndex - 1; i >= 0; i--) { - cellsToTheTop.add(rows.get(i).get(colIndex)); + for (int i = 0; i < rowIndex; i++) { + try { + cellsToTheTop.add(rows.get(i).get(colIndex)); + } catch (IndexOutOfBoundsException e) { + log.warn("No cell {} in row {}, ignoring.", colIndex, rowIndex); + } } for (Cell topCell : cellsToTheTop) { if (topCell.isHeaderCell()) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java index da4ed03b..f29e83d3 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java @@ -130,7 +130,7 @@ public class EntityRedactionServiceTest { Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument); entityRedactionService.processDocument(classifiedDoc, null); assertThat(classifiedDoc.getEntities()).hasSize(1); // one page - assertThat(classifiedDoc.getEntities().get(1)).hasSize(5); // 4 out of 5 entities recognized on page 1 + assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 names, 1 address, 1 Y and 2 N entities } } @@ -193,6 +193,7 @@ public class EntityRedactionServiceTest { } + @Test public void headerPropagation() throws IOException { @@ -219,6 +220,31 @@ public class EntityRedactionServiceTest { } + @Test + public void testNGuideline() throws IOException { + + ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Empty Tabular Data.pdf"); + + DictionaryResponse dictionaryResponse = DictionaryResponse.builder() + .entries(Collections.singletonList("Aldershof S.")) + .build(); + + when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet()); + when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(dictionaryResponse); + DictionaryResponse addressResponse = DictionaryResponse.builder() + .entries(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")) + .build(); + when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse); + + try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) { + Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument); + entityRedactionService.processDocument(classifiedDoc, null); + assertThat(classifiedDoc.getEntities()).hasSize(1); // one page + assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(6); + } + } + + @Before public void stubRedaction() { String tableRules = "package drools\n" + @@ -226,12 +252,20 @@ public class EntityRedactionServiceTest { "import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" + "\n" + "global Section section\n" + + "rule \"8: Not redacted because Vertebrate Study = N\"\n" + + " when\n" + + " Section(rowEquals(\"Vertebrate study Y/N\", \"N\"))\n" + + " then\n" + + " section.redactNot(\"name\", 8, \"Not redacted because row is not a vertebrate study\");\n" + + " section.redactNot(\"address\", 8, \"Not redacted because row is not a vertebrate study\");\n" + + " section.highlightCell(\"Vertebrate study Y/N\", 8, \"hint_only\");\n" + + " end\n" + "rule \"9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study\"\n" + " when\n" + " Section(rowEquals(\"Vertebrate study Y/N\", \"Y\"))\n" + " then\n" + " section.redact(\"name\", 9, \"Redacted because row is a vertebrate study\");\n" + - " section.redact(\"address\", 9, \"Redacted because rows is a vertebrate study\");\n" + + " section.redact(\"address\", 9, \"Redacted because row is a vertebrate study\");\n" + " section.highlightCell(\"Vertebrate study Y/N\", 9, \"must_redact\");\n" + " end"; when(rulesClient.getVersion()).thenReturn(1L);