diff --git a/.gitignore b/.gitignore index 0671615..ef2a133 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,7 @@ build/ ### VS Code ### .vscode/ +gradlew.bat +gradlew +gradle.properties +gradle/ diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/PDFLinesTextStripper.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/PDFLinesTextStripper.java index cd4ff06..839075d 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/PDFLinesTextStripper.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/PDFLinesTextStripper.java @@ -4,6 +4,7 @@ import java.awt.color.CMMException; import java.awt.geom.Point2D; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.apache.pdfbox.contentstream.operator.Operator; @@ -219,7 +220,11 @@ public class PDFLinesTextStripper extends PDFTextStripper { // This is a quick and dirt hack // Happens for file 216.pdf log.debug(e.getMessage()); - return color.getComponents()[0] == 0 && color.getComponents()[1] == 0 && color.getComponents()[2] == 0 && color.getComponents()[1] == 1; + var result = true; + for (var component : color.getComponents()) { + result = result && component == 0; + } + return result; } } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java index 10db93f..965f89d 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java @@ -1,5 +1,7 @@ package com.knecon.fforesight.service.layoutparser.server; +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; + import java.awt.Color; import java.io.File; import java.io.FileInputStream; @@ -17,6 +19,7 @@ import org.apache.pdfbox.pdmodel.PDDocument; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.io.ClassPathResource; import com.fasterxml.jackson.databind.ObjectMapper; import com.knecon.fforesight.service.layoutparser.internal.api.data.taas.ResearchDocumentData; @@ -56,6 +59,18 @@ public class BdrJsonBuildTest extends AbstractTest { } + @Test + @SneakyThrows + public void testBDRFile199865() { + + File file = new ClassPathResource("files/bdr/Drucksache_19_9865.pdf").getFile(); + Document document = buildGraph(file); + ResearchDocumentData researchDocumentData = TaasDocumentDataMapper.fromDocument(document); + assertThat(researchDocumentData).isNotNull(); + + } + + @Test @Disabled public void writeBDRDocumentData() throws IOException { diff --git a/layoutparser-service/layoutparser-service-server/src/test/resources/files/bdr/Drucksache_19_9865.pdf b/layoutparser-service/layoutparser-service-server/src/test/resources/files/bdr/Drucksache_19_9865.pdf new file mode 100644 index 0000000..eeefafa Binary files /dev/null and b/layoutparser-service/layoutparser-service-server/src/test/resources/files/bdr/Drucksache_19_9865.pdf differ