From 8edaa93bda3415141e065b1c07107753ab8f2836 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thierry=20G=C3=B6ckel?= Date: Thu, 13 Aug 2020 10:07:23 +0200 Subject: [PATCH 1/5] Log warning message if tabular data mismatches --- .../v1/server/redaction/service/EntityRedactionService.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index eef620e1..602df948 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -23,7 +23,9 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +@Slf4j @Service @RequiredArgsConstructor public class EntityRedactionService { @@ -113,7 +115,8 @@ public class EntityRedactionService { private Map toMap(List keys, List values) { if (keys.size() != values.size()) { - throw new RuntimeException("Cannot merge lists of unequal size."); + log.warn("Cannot merge lists of unequal size, returning empty map."); + return new HashMap<>(); } Map result = new HashMap<>(); for (int i = 0; i < keys.size(); i++) { From 5542a97a381465535c0e5b25a625b685566226de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thierry=20G=C3=B6ckel?= Date: Thu, 13 Aug 2020 10:14:11 +0200 Subject: [PATCH 2/5] Add test redacting all files and expecting no exception --- .../v1/server/RedactionIntegrationTest.java | 62 ++++++++++++++++--- 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 9e2771ae..9a05bdd3 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -5,6 +5,8 @@ import static org.springframework.boot.test.context.SpringBootTest.WebEnvironmen import java.io.BufferedReader; import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; @@ -19,7 +21,6 @@ import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.junit.Before; -import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; import org.kie.api.KieServices; @@ -48,7 +49,6 @@ import com.iqser.red.service.redaction.v1.server.controller.RedactionController; import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader; import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; -@Ignore @RunWith(SpringRunner.class) @SpringBootTest(webEnvironment = DEFINED_PORT) public class RedactionIntegrationTest { @@ -219,11 +219,51 @@ public class RedactionIntegrationTest { } + @Test + public void noExceptionShouldBeThrownForAnyFiles() throws IOException { + + ClassLoader loader = getClass().getClassLoader(); + URL url = loader.getResource("files"); + File[] files = new File(url.getPath()).listFiles(); + List input = new ArrayList<>(); + for (File file : files) { + input.addAll(getPathsRecursively(file)); + } + for (File path : input) { + RedactionRequest request = RedactionRequest.builder() + .document(IOUtils.toByteArray(new FileInputStream(path))) + .build(); + System.out.println("Redacting file : " + path.getName()); + redactionController.redact(request); + } + + } + + + private List getPathsRecursively(File path) { + + List result = new ArrayList<>(); + if (path == null || path.listFiles() == null) { + return result; + } + for (File f : path.listFiles()) { + if (f.isFile()) { + result.add(f); + } else { + result.addAll(getPathsRecursively(f)); + } + } + return result; + + } + + @Test public void redactionTest() throws IOException { long start = System.currentTimeMillis(); - ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S" + + "-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf"); RedactionRequest request = RedactionRequest.builder() .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) @@ -241,6 +281,7 @@ public class RedactionIntegrationTest { System.out.println("numberOfPages: " + result.getNumberOfPages()); } + @Test public void testTableRedaction() throws IOException { @@ -266,7 +307,8 @@ public class RedactionIntegrationTest { @Test public void classificationTest() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 " + + "Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf"); RedactionRequest request = RedactionRequest.builder() .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) @@ -283,7 +325,8 @@ public class RedactionIntegrationTest { @Test public void sectionsTest() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 " + + "Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf"); RedactionRequest request = RedactionRequest.builder() .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) @@ -300,7 +343,8 @@ public class RedactionIntegrationTest { @Test public void htmlTablesTest() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 " + + "Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf"); RedactionRequest request = RedactionRequest.builder() .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) @@ -317,7 +361,8 @@ public class RedactionIntegrationTest { @Test public void htmlTableRotationTest() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S" + + "-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf"); RedactionRequest request = RedactionRequest.builder() .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) @@ -337,7 +382,8 @@ public class RedactionIntegrationTest { if (resource == null) { throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl"); } - try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), StandardCharsets.UTF_8))) { + try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), + StandardCharsets.UTF_8))) { StringBuilder sb = new StringBuilder(); String str; while ((str = br.readLine()) != null) { From a151a13b4cc21718dd9c34f8991af2b624ec3f08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thierry=20G=C3=B6ckel?= Date: Thu, 13 Aug 2020 11:02:09 +0200 Subject: [PATCH 3/5] Fix NPE for empty cells --- .../server/redaction/service/EntityRedactionService.java | 7 ++++++- .../redaction/v1/server/RedactionIntegrationTest.java | 3 +-- .../src/test/resources/drools/rules.drl | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index 602df948..3118e003 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -120,7 +120,12 @@ public class EntityRedactionService { } Map result = new HashMap<>(); for (int i = 0; i < keys.size(); i++) { - result.put(keys.get(i), values.get(i)); + String value = values.get(i); + if (value == null) { + log.warn("Drools does not support null values."); + continue; + } + result.put(keys.get(i), value); } return result; diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 9a05bdd3..556d1cc6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -262,8 +262,7 @@ public class RedactionIntegrationTest { public void redactionTest() throws IOException { long start = System.currentTimeMillis(); - ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S" + - "-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_21_Volume_3CP_A9396G_B-9_2018-09-06.pdf"); RedactionRequest request = RedactionRequest.builder() .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index 8d13e9f0..e89e8eb7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -101,7 +101,7 @@ rule "8: Redact contact information, if Producer is found" rule "9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study" when - Section(tabularData != null && tabularData.size() > 0 + Section(tabularData != null && tabularData.containsKey("Vertebrate study Y/N") && tabularData.get("Vertebrate study Y/N").equals("Y") ) From 32aa500983b36569b6ee20d822693b3a2c9d7d59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thierry=20G=C3=B6ckel?= Date: Thu, 13 Aug 2020 11:04:56 +0200 Subject: [PATCH 4/5] Remove redundant warn message --- .../v1/server/redaction/service/EntityRedactionService.java | 1 - 1 file changed, 1 deletion(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index 3118e003..e5593157 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -122,7 +122,6 @@ public class EntityRedactionService { for (int i = 0; i < keys.size(); i++) { String value = values.get(i); if (value == null) { - log.warn("Drools does not support null values."); continue; } result.put(keys.get(i), value); From 17aabcd09c1f76bdd467f66aac0cf243c0625734 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thierry=20G=C3=B6ckel?= Date: Thu, 13 Aug 2020 11:13:01 +0200 Subject: [PATCH 5/5] Fix index out of bounds exception --- .../redaction/v1/server/tableextraction/model/Table.java | 3 +++ .../service/redaction/v1/server/RedactionIntegrationTest.java | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java index 1260bfd8..14d2f7d2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java @@ -81,6 +81,9 @@ public class Table extends AbstractTextContainer { private List computeHeaders() { boolean allBold = true; + if (rows.isEmpty()) { + return Collections.emptyList(); + } List rowCells = rows.get(0); for (Cell cell : rowCells) { if (cell == null || CollectionUtils.isEmpty(cell.getTextBlocks()) || diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 556d1cc6..1683b86f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -262,7 +262,7 @@ public class RedactionIntegrationTest { public void redactionTest() throws IOException { long start = System.currentTimeMillis(); - ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_21_Volume_3CP_A9396G_B-9_2018-09-06.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/Trinexapac/96 Trinexapac-ethyl_RAR_09_Volume_3CA_B-7_2018-02-23.pdf"); RedactionRequest request = RedactionRequest.builder() .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))