diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java
index a5e7e0f4..b41f1123 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java
@@ -52,12 +52,13 @@ public class EntityRedactionService {
List
tables = paragraph.getTables();
for (Table table : tables) {
+ boolean singleCellTable = table.getRowCount() == 1 && table.getColCount() == 1;
for (List| row : table.getRows()) {
SearchableText searchableRow = new SearchableText();
Map tabularData = new HashMap<>();
int start = 0;
for (Cell cell : row) {
- if (cell.isHeaderCell() || CollectionUtils.isEmpty(cell.getTextBlocks())) {
+ if (!singleCellTable && cell.isHeaderCell() || CollectionUtils.isEmpty(cell.getTextBlocks())) {
continue;
}
addSectionToManualRedactions(cell.getTextBlocks(), manualRedactions, table.getHeadline(), sectionNumber);
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java
index f29e83d3..736c98bc 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java
@@ -57,6 +57,7 @@ public class EntityRedactionServiceTest {
private static final String ADDRESS_CODE = "address";
private static final AtomicLong DICTIONARY_VERSION = new AtomicLong();
+ private static final AtomicLong RULES_VERSION = new AtomicLong();
@MockBean
private DictionaryClient dictionaryClient;
@@ -69,6 +70,9 @@ public class EntityRedactionServiceTest {
@Autowired
private PdfSegmentationService pdfSegmentationService;
+ @Autowired
+ private DroolsExecutionService droolsExecutionService;
+
@TestConfiguration
public static class RedactionIntegrationTestConfiguration {
@@ -185,7 +189,7 @@ public class EntityRedactionServiceTest {
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
entityRedactionService.processDocument(classifiedDoc, null);
- assertThat(classifiedDoc.getEntities()).hasSize(1); // two pages
+ assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1).stream()
.filter(entity -> entity.getMatchedRule() == 9)
.count()).isEqualTo(10);
@@ -194,6 +198,60 @@ public class EntityRedactionServiceTest {
}
+ @Test
+ public void testApplicantInTableRedaction() throws IOException {
+
+ String tableRules = "package drools\n" +
+ "\n" +
+ "import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" +
+ "\n" +
+ "global Section section\n" +
+ "rule \"6: Redact contact information if applicant is found\"\n" +
+ " when\n" +
+ " eval(section.headlineContainsWord(\"applicant\") || section.getText().contains(\"Applicant\"));\n" +
+ " then\n" +
+ " section.redactLineAfter(\"Name:\", \"address\", 6, \"Applicant information was found\");\n" +
+ " section.redactBetween(\"Address:\", \"Contact\", \"address\", 6, \"Applicant information was found\");\n" +
+ " section.redactLineAfter(\"Contact point:\", \"address\", 6, \"Applicant information was found\");\n" +
+ " section.redactLineAfter(\"Phone:\", \"address\", 6, \"Applicant information was found\");\n" +
+ " section.redactLineAfter(\"Fax:\", \"address\", 6, \"Applicant information was found\");\n" +
+ " section.redactLineAfter(\"Tel.:\", \"address\", 6, \"Applicant information was found\");\n" +
+ " section.redactLineAfter(\"Tel:\", \"address\", 6, \"Applicant information was found\");\n" +
+ " section.redactLineAfter(\"E-mail:\", \"address\", 6, \"Applicant information was found\");\n" +
+ " section.redactLineAfter(\"Email:\", \"address\", 6, \"Applicant information was found\");\n" +
+ " section.redactLineAfter(\"Contact:\", \"address\", 6, \"Applicant information was found\");\n" +
+ " section.redactLineAfter(\"Telephone number:\", \"address\", 6, \"Applicant information was found\");\n" +
+ " section.redactLineAfter(\"Fax number:\", \"address\", 6, \"Applicant information was found\");\n" +
+ " section.redactLineAfter(\"Telephone:\", \"address\", 6, \"Applicant information was found\");\n" +
+ " section.redactBetween(\"No:\", \"Fax\", \"address\", 6, \"Applicant information was found\");\n" +
+ " section.redactBetween(\"Contact:\", \"Tel.:\", \"address\", 6, \"Applicant information was found\");\n" +
+ " end";
+ when(rulesClient.getVersion()).thenReturn(RULES_VERSION.incrementAndGet());
+ when(rulesClient.getRules()).thenReturn(new RulesResponse(tableRules));
+ droolsExecutionService.updateRules();
+
+ ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Applicant Producer Table.pdf");
+ when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
+ DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
+ .entries(new ArrayList<>(ResourceLoader.load("dictionaries/names.txt")))
+ .build();
+ when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(dictionaryResponse);
+ DictionaryResponse addressResponse = DictionaryResponse.builder()
+ .entries(new ArrayList<>(ResourceLoader.load("dictionaries/addresses.txt")))
+ .build();
+ when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
+ try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
+ Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
+ entityRedactionService.processDocument(classifiedDoc, null);
+ assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
+ assertThat(classifiedDoc.getEntities().get(1).stream()
+ .filter(entity -> entity.getMatchedRule() == 6)
+ .count()).isEqualTo(18);
+ }
+
+ }
+
+
@Test
public void headerPropagation() throws IOException {
@@ -268,7 +326,7 @@ public class EntityRedactionServiceTest {
" section.redact(\"address\", 9, \"Redacted because row is a vertebrate study\");\n" +
" section.highlightCell(\"Vertebrate study Y/N\", 9, \"must_redact\");\n" +
" end";
- when(rulesClient.getVersion()).thenReturn(1L);
+ when(rulesClient.getVersion()).thenReturn(RULES_VERSION.incrementAndGet());
when(rulesClient.getRules()).thenReturn(new RulesResponse(tableRules));
TypeResponse typeResponse = TypeResponse.builder()
.types(Arrays.asList(
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl
index e461ff8b..c991b0b5 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl
@@ -49,7 +49,7 @@ rule "5: Do not redact in guideline sections"
section.redactNot("address", 5, "Section is a guideline section.");
end
-rule "6: Redact contact information, if applicant is found"
+rule "6: Redact contact information if applicant is found"
when
eval(section.headlineContainsWord("applicant") || section.getText().contains("Applicant"));
then
@@ -70,7 +70,7 @@ rule "6: Redact contact information, if applicant is found"
section.redactBetween("Contact:", "Tel.:", "address", 6, "Applicant information was found");
end
-rule "7: Redact contact information, if Producer is found"
+rule "7: Redact contact information if Producer is found"
when
eval(section.getText().toLowerCase().contains("producer of the plant protection") || section.getText().toLowerCase().contains("producer of the active substance") || section.getText().contains("Manufacturer of the active substance") || section.getText().contains("Manufacturer:") || section.getText().contains("Producer or producers of the active substance"));
then
@@ -110,7 +110,7 @@ rule "9: Redact if must redact entry is found"
end
-rule "10: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study"
+rule "10: Redact Authors and Addresses in Reference Table if it is a Vertebrate study"
when
Section(rowEquals("Vertebrate study Y/N", "Y"))
then
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/Applicant Producer Table.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/Applicant Producer Table.pdf
new file mode 100644
index 00000000..7a878561
Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/Applicant Producer Table.pdf differ
|