RED-5276: Fixed strange behavior of text parsing for tables example document
This commit is contained in:
parent
c16b6d41d5
commit
16b04b5918
@ -22,6 +22,7 @@ import org.apache.pdfbox.text.TextPosition;
|
||||
import java.awt.geom.Point2D;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
@Slf4j
|
||||
@ -200,6 +201,8 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
int startIndex = 0;
|
||||
RedTextPosition previous = null;
|
||||
|
||||
textPositions.sort(Comparator.comparing(TextPosition::getXDirAdj));
|
||||
|
||||
for (int i = 0; i <= textPositions.size() - 1; i++) {
|
||||
|
||||
if (!textPositionSequences.isEmpty()) {
|
||||
|
||||
@ -13,7 +13,6 @@ public class FileUtils {
|
||||
|
||||
public File createTempFile(String filenamePrefix, String filenameSuffix) throws IOException {
|
||||
|
||||
System.out.println(filenamePrefix + " " + filenameSuffix);
|
||||
File tempFile = Files.createTempFile(filenamePrefix, filenameSuffix).toFile();
|
||||
setRWPermissionsOnlyForOwner(tempFile);
|
||||
|
||||
|
||||
@ -364,7 +364,7 @@ public class RedactionIntegrationTest {
|
||||
@Test
|
||||
public void titleExtraction() throws IOException {
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/RSS/06 - Isopyrazam - Acute Oral Toxicity Rat.pdf");
|
||||
AnalyzeRequest request = prepareStorage("files/new/table-with-merged-cells.pdf");
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
@ -1174,7 +1174,7 @@ public class RedactionIntegrationTest {
|
||||
public void htmlTablesTest() throws IOException {
|
||||
|
||||
System.out.println("htmlTablesTest");
|
||||
AnalyzeRequest request = prepareStorage("files/Minimal Examples/Single Table.pdf");
|
||||
AnalyzeRequest request = prepareStorage("files/new/table-with-merged-cells.pdf");
|
||||
|
||||
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||
.dossierId(request.getDossierId())
|
||||
|
||||
@ -397,4 +397,11 @@ rule "102: Guidelines FileAttributes"
|
||||
Section((text.contains("DATA REQUIREMENT(S):") || text.contains("TEST GUIDELINE(S):")) && (text.contains("OECD") || text.contains("EPA") || text.contains("OPPTS")))
|
||||
then
|
||||
section.addFileAttribute("OECD Number", "OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", false, 0);
|
||||
end
|
||||
|
||||
rule "8: Redact Author cells in Tables with Author header (Non vertebrate study)"
|
||||
when
|
||||
Section(hasTableHeader("h5.1"))
|
||||
then
|
||||
section.redactCell("h5.1", 8, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
Binary file not shown.
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user