diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java index 3f0f32e1..560d5b2c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java @@ -22,6 +22,7 @@ import org.apache.pdfbox.text.TextPosition; import java.awt.geom.Point2D; import java.io.IOException; import java.util.ArrayList; +import java.util.Comparator; import java.util.List; @Slf4j @@ -200,6 +201,8 @@ public class PDFLinesTextStripper extends PDFTextStripper { int startIndex = 0; RedTextPosition previous = null; + textPositions.sort(Comparator.comparing(TextPosition::getXDirAdj)); + for (int i = 0; i <= textPositions.size() - 1; i++) { if (!textPositionSequences.isEmpty()) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/FileUtils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/FileUtils.java index 209d5f58..aa8cd148 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/FileUtils.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/FileUtils.java @@ -13,7 +13,6 @@ public class FileUtils { public File createTempFile(String filenamePrefix, String filenameSuffix) throws IOException { - System.out.println(filenamePrefix + " " + filenameSuffix); File tempFile = Files.createTempFile(filenamePrefix, filenameSuffix).toFile(); setRWPermissionsOnlyForOwner(tempFile); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 38360d8b..9898a85c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -364,7 +364,7 @@ public class RedactionIntegrationTest { @Test public void titleExtraction() throws IOException { - AnalyzeRequest request = prepareStorage("files/RSS/06 - Isopyrazam - Acute Oral Toxicity Rat.pdf"); + AnalyzeRequest request = prepareStorage("files/new/table-with-merged-cells.pdf"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); AnalyzeResult result = analyzeService.analyze(request); @@ -1174,7 +1174,7 @@ public class RedactionIntegrationTest { public void htmlTablesTest() throws IOException { System.out.println("htmlTablesTest"); - AnalyzeRequest request = prepareStorage("files/Minimal Examples/Single Table.pdf"); + AnalyzeRequest request = prepareStorage("files/new/table-with-merged-cells.pdf"); RedactionRequest redactionRequest = RedactionRequest.builder() .dossierId(request.getDossierId()) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index 2b220fbb..24d3c8d1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -397,4 +397,11 @@ rule "102: Guidelines FileAttributes" Section((text.contains("DATA REQUIREMENT(S):") || text.contains("TEST GUIDELINE(S):")) && (text.contains("OECD") || text.contains("EPA") || text.contains("OPPTS"))) then section.addFileAttribute("OECD Number", "OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", false, 0); + end + +rule "8: Redact Author cells in Tables with Author header (Non vertebrate study)" + when + Section(hasTableHeader("h5.1")) + then + section.redactCell("h5.1", 8, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/StrangeOneString.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/StrangeOneString.pdf new file mode 100644 index 00000000..c8b8d635 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/StrangeOneString.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/table-with-merged-cells.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/table-with-merged-cells.pdf new file mode 100644 index 00000000..d1799c6c Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/table-with-merged-cells.pdf differ