Pull request #23: Log warning message if tabular data mismatches

Merge in RED/redaction-service from RED-101-quickfix to master

* commit '17aabcd09c1f76bdd467f66aac0cf243c0625734':
  Fix index out of bounds exception
  Remove redundant warn message
  Fix NPE for empty cells
  Add test redacting all files and expecting no exception
  Log warning message if tabular data mismatches
This commit is contained in:
Thierry Goeckel 2020-08-13 11:33:17 +02:00
commit 954765759c
4 changed files with 66 additions and 11 deletions

View File

@ -23,7 +23,9 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class EntityRedactionService {
@ -113,11 +115,16 @@ public class EntityRedactionService {
private Map<String, String> toMap(List<String> keys, List<String> values) {
if (keys.size() != values.size()) {
throw new RuntimeException("Cannot merge lists of unequal size.");
log.warn("Cannot merge lists of unequal size, returning empty map.");
return new HashMap<>();
}
Map<String, String> result = new HashMap<>();
for (int i = 0; i < keys.size(); i++) {
result.put(keys.get(i), values.get(i));
String value = values.get(i);
if (value == null) {
continue;
}
result.put(keys.get(i), value);
}
return result;

View File

@ -81,6 +81,9 @@ public class Table extends AbstractTextContainer {
private List<String> computeHeaders() {
boolean allBold = true;
if (rows.isEmpty()) {
return Collections.emptyList();
}
List<Cell> rowCells = rows.get(0);
for (Cell cell : rowCells) {
if (cell == null || CollectionUtils.isEmpty(cell.getTextBlocks()) ||

View File

@ -5,6 +5,8 @@ import static org.springframework.boot.test.context.SpringBootTest.WebEnvironmen
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
@ -19,7 +21,6 @@ import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.kie.api.KieServices;
@ -48,7 +49,6 @@ import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
@Ignore
@RunWith(SpringRunner.class)
@SpringBootTest(webEnvironment = DEFINED_PORT)
public class RedactionIntegrationTest {
@ -219,11 +219,50 @@ public class RedactionIntegrationTest {
}
@Test
public void noExceptionShouldBeThrownForAnyFiles() throws IOException {
ClassLoader loader = getClass().getClassLoader();
URL url = loader.getResource("files");
File[] files = new File(url.getPath()).listFiles();
List<File> input = new ArrayList<>();
for (File file : files) {
input.addAll(getPathsRecursively(file));
}
for (File path : input) {
RedactionRequest request = RedactionRequest.builder()
.document(IOUtils.toByteArray(new FileInputStream(path)))
.build();
System.out.println("Redacting file : " + path.getName());
redactionController.redact(request);
}
}
private List<File> getPathsRecursively(File path) {
List<File> result = new ArrayList<>();
if (path == null || path.listFiles() == null) {
return result;
}
for (File f : path.listFiles()) {
if (f.isFile()) {
result.add(f);
} else {
result.addAll(getPathsRecursively(f));
}
}
return result;
}
@Test
public void redactionTest() throws IOException {
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/Trinexapac/96 Trinexapac-ethyl_RAR_09_Volume_3CA_B-7_2018-02-23.pdf");
RedactionRequest request = RedactionRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
@ -241,6 +280,7 @@ public class RedactionIntegrationTest {
System.out.println("numberOfPages: " + result.getNumberOfPages());
}
@Test
public void testTableRedaction() throws IOException {
@ -266,7 +306,8 @@ public class RedactionIntegrationTest {
@Test
public void classificationTest() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 " +
"Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
RedactionRequest request = RedactionRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
@ -283,7 +324,8 @@ public class RedactionIntegrationTest {
@Test
public void sectionsTest() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 " +
"Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
RedactionRequest request = RedactionRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
@ -300,7 +342,8 @@ public class RedactionIntegrationTest {
@Test
public void htmlTablesTest() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 " +
"Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
RedactionRequest request = RedactionRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
@ -317,7 +360,8 @@ public class RedactionIntegrationTest {
@Test
public void htmlTableRotationTest() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S" +
"-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
RedactionRequest request = RedactionRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
@ -337,7 +381,8 @@ public class RedactionIntegrationTest {
if (resource == null) {
throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl");
}
try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), StandardCharsets.UTF_8))) {
try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(),
StandardCharsets.UTF_8))) {
StringBuilder sb = new StringBuilder();
String str;
while ((str = br.readLine()) != null) {

View File

@ -101,7 +101,7 @@ rule "8: Redact contact information, if Producer is found"
rule "9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study"
when
Section(tabularData != null && tabularData.size() > 0
Section(tabularData != null
&& tabularData.containsKey("Vertebrate study Y/N")
&& tabularData.get("Vertebrate study Y/N").equals("Y")
)