diff --git a/.gitignore b/.gitignore index 5b60bf8..0671615 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,7 @@ target/ .settings .springBeans .sts4-cache +.gradle ### IntelliJ IDEA ### .idea diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 601d1c3..a683836 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,4 +1,4 @@ include: - project: 'gitlab/gitlab' ref: 'main' - file: 'ci-templates/maven_java.yml' \ No newline at end of file + file: 'ci-templates/gradle_java.yml' \ No newline at end of file diff --git a/.mvn/wrapper/maven-wrapper.jar b/.mvn/wrapper/maven-wrapper.jar deleted file mode 100644 index bf82ff0..0000000 Binary files a/.mvn/wrapper/maven-wrapper.jar and /dev/null differ diff --git a/.mvn/wrapper/maven-wrapper.properties b/.mvn/wrapper/maven-wrapper.properties deleted file mode 100644 index ca5ab4b..0000000 --- a/.mvn/wrapper/maven-wrapper.properties +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.7/apache-maven-3.8.7-bin.zip -wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.1.1/maven-wrapper-3.1.1.jar diff --git a/buildSrc/build.gradle.kts b/buildSrc/build.gradle.kts new file mode 100644 index 0000000..cc02e63 --- /dev/null +++ b/buildSrc/build.gradle.kts @@ -0,0 +1,7 @@ +plugins { + `kotlin-dsl` +} + +repositories { + gradlePluginPortal() +} diff --git a/buildSrc/src/main/kotlin/com.knecon.fforesight.java-conventions.gradle.kts b/buildSrc/src/main/kotlin/com.knecon.fforesight.java-conventions.gradle.kts new file mode 100644 index 0000000..a64e24a --- /dev/null +++ b/buildSrc/src/main/kotlin/com.knecon.fforesight.java-conventions.gradle.kts @@ -0,0 +1,56 @@ +plugins { + java + pmd + checkstyle + jacoco +} + +group = "com.knecon.fforesight" +version = "0.1-SNAPSHOT" + +java.sourceCompatibility = JavaVersion.VERSION_17 +java.targetCompatibility = JavaVersion.VERSION_17 + +tasks.jacocoTestReport { + reports { + xml.required.set(false) + csv.required.set(false) + html.outputLocation.set(layout.buildDirectory.dir("jacocoHtml")) + } +} + +tasks.pmdMain { + pmd.ruleSetFiles = files("${rootDir}/config/pmd/pmd.xml") +} + +tasks.pmdTest { + pmd.ruleSetFiles = files("${rootDir}/config/pmd/test_pmd.xml") +} + +tasks.named("test") { + useJUnitPlatform() +} + +tasks.test { + finalizedBy(tasks.jacocoTestReport) // report is always generated after tests run +} + +tasks.jacocoTestReport { + dependsOn(tasks.test) // tests are required to run before generating the report + reports { + xml.required.set(true) + csv.required.set(false) + } +} + +repositories { + mavenLocal() + mavenCentral() + maven { + url = uri("https://nexus.knecon.com/repository/gindev/"); + credentials { + username = providers.gradleProperty("mavenUser").getOrNull(); + password = providers.gradleProperty("mavenPassword").getOrNull(); + } + } +} diff --git a/config/checkstyle/checkstyle.xml b/config/checkstyle/checkstyle.xml new file mode 100644 index 0000000..8faaf13 --- /dev/null +++ b/config/checkstyle/checkstyle.xml @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/config/pmd/pmd.xml b/config/pmd/pmd.xml new file mode 100644 index 0000000..f843201 --- /dev/null +++ b/config/pmd/pmd.xml @@ -0,0 +1,21 @@ + + + + + Knecon ruleset checks the code for bad stuff + + + + + + + + + + + + + \ No newline at end of file diff --git a/config/pmd/test_pmd.xml b/config/pmd/test_pmd.xml new file mode 100644 index 0000000..9c74fe4 --- /dev/null +++ b/config/pmd/test_pmd.xml @@ -0,0 +1,23 @@ + + + + + Knecon test ruleset checks the code for bad stuff + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/layoutparser-service-image/pom.xml b/layoutparser-service-image/pom.xml deleted file mode 100644 index 8160ee5..0000000 --- a/layoutparser-service-image/pom.xml +++ /dev/null @@ -1,99 +0,0 @@ - - - 4.0.0 - - - - com.knecon.fforesight - platform-docker-dependency - 0.1.0 - - - - com.knecon.fforesight - layoutparser-service-image - 0.1-SNAPSHOT - pom - - - layoutparser-service-server - ${service.server}.jar - false - ff - ${docker.image.prefix}/${service.server} - - - - - - org.apache.maven.plugins - maven-dependency-plugin - - - org.apache.maven.plugins - maven-resources-plugin - - - org.codehaus.mojo - exec-maven-plugin - - - io.fabric8 - docker-maven-plugin - - - - - - - org.apache.maven.plugins - maven-dependency-plugin - - - download-platform-jar - prepare-package - - copy - - - - - ${project.groupId} - ${service.server} - ${project.version} - jar - true - ${platform.jar} - - - ${docker.build.directory} - - - - - - io.fabric8 - docker-maven-plugin - - - - ${docker.image.name} - - ${docker.build.directory} - - ${platform.jar} - - - ${docker.image.version} - latest - - - - - - - - - - diff --git a/layoutparser-service-image/src/main/docker/Dockerfile b/layoutparser-service-image/src/main/docker/Dockerfile deleted file mode 100644 index ca4a906..0000000 --- a/layoutparser-service-image/src/main/docker/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -FROM red/base-image:2.0.2 - -ARG PLATFORM_JAR - -ENV PLATFORM_JAR ${PLATFORM_JAR} - -ENV USES_ELASTICSEARCH false - -COPY ["${PLATFORM_JAR}", "/"] diff --git a/layoutparser-service/layoutparser-service-internal-api/build.gradle.kts b/layoutparser-service/layoutparser-service-internal-api/build.gradle.kts new file mode 100644 index 0000000..f66c3b9 --- /dev/null +++ b/layoutparser-service/layoutparser-service-internal-api/build.gradle.kts @@ -0,0 +1,6 @@ +plugins { + id("com.knecon.fforesight.java-conventions") + id("io.freefair.lombok") version "8.1.0" +} + +description = "layoutparser-service-internal-api" diff --git a/layoutparser-service/layoutparser-service-internal-api/pom.xml b/layoutparser-service/layoutparser-service-internal-api/pom.xml deleted file mode 100755 index d52eecf..0000000 --- a/layoutparser-service/layoutparser-service-internal-api/pom.xml +++ /dev/null @@ -1,24 +0,0 @@ - - - - 4.0.0 - - - com.knecon.fforesight - layoutparser-service - 0.1-SNAPSHOT - - - layoutparser-service-internal-api - - - - com.google.guava - guava - ${guava.version} - - - - diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentData.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentData.java index 3aec1ec..7dcd7cb 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentData.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentData.java @@ -17,5 +17,4 @@ public class DocumentData { DocumentPositionData[] documentPositions; DocumentStructure documentStructure; - } diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentPage.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentPage.java index 18c7353..262ea55 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentPage.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentPage.java @@ -4,12 +4,14 @@ import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; +import lombok.NoArgsConstructor; import lombok.experimental.FieldDefaults; @Data @Builder +@NoArgsConstructor @AllArgsConstructor -@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) +@FieldDefaults(level = AccessLevel.PRIVATE) public class DocumentPage { int number; diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentPositionData.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentPositionData.java index 4a21644..aa60b9e 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentPositionData.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentPositionData.java @@ -4,12 +4,14 @@ import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; +import lombok.NoArgsConstructor; import lombok.experimental.FieldDefaults; @Data @Builder +@NoArgsConstructor @AllArgsConstructor -@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) +@FieldDefaults(level = AccessLevel.PRIVATE) public class DocumentPositionData { Long id; diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentStructure.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentStructure.java index 0fa7e28..97e49ec 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentStructure.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentStructure.java @@ -8,14 +8,13 @@ import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; -import lombok.Getter; import lombok.NoArgsConstructor; import lombok.experimental.FieldDefaults; @Data @Builder -@AllArgsConstructor @NoArgsConstructor +@AllArgsConstructor @FieldDefaults(level = AccessLevel.PRIVATE) public class DocumentStructure { @@ -53,10 +52,11 @@ public class DocumentStructure { } + @Data @Builder - @Getter + @NoArgsConstructor @AllArgsConstructor - @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) + @FieldDefaults(level = AccessLevel.PRIVATE) public static class EntryData { NodeType type; diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentTextData.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentTextData.java index 09b90d8..a3fd9d8 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentTextData.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentTextData.java @@ -6,12 +6,14 @@ import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; +import lombok.NoArgsConstructor; import lombok.experimental.FieldDefaults; @Data @Builder +@NoArgsConstructor @AllArgsConstructor -@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) +@FieldDefaults(level = AccessLevel.PRIVATE) public class DocumentTextData { Long id; @@ -22,6 +24,4 @@ public class DocumentTextData { int end; int[] lineBreaks; - - } diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/TableData.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/TableData.java index e5153dd..513dae8 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/TableData.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/TableData.java @@ -12,4 +12,5 @@ public class TableData { List rowData; Integer numberOfCols; Integer numberOfRows; + } diff --git a/layoutparser-service/layoutparser-service-processor/build.gradle.kts b/layoutparser-service/layoutparser-service-processor/build.gradle.kts new file mode 100644 index 0000000..213a237 --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/build.gradle.kts @@ -0,0 +1,21 @@ +plugins { + id("com.knecon.fforesight.java-conventions") + id("io.freefair.lombok") version "8.1.0" +} + +dependencies { + implementation(project(":layoutparser-service-internal-api")) + + implementation("com.iqser.red.service:persistence-service-shared-api-v1:2.36.0") + implementation("com.knecon.fforesight:tenant-commons:0.10.0") + implementation("com.iqser.red.commons:storage-commons:2.1.0") + + implementation("org.apache.pdfbox:pdfbox:3.0.0-alpha2") + implementation("org.apache.pdfbox:pdfbox-tools:3.0.0-alpha2") + implementation("com.fasterxml.jackson.module:jackson-module-afterburner:2.15.0-rc2") + implementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.15.0-rc2") + implementation("org.springframework.boot:spring-boot-starter-web:3.0.6") + implementation("org.springframework.boot:spring-boot-starter-amqp:3.0.6") +} + +description = "layoutparser-service-processor" diff --git a/layoutparser-service/layoutparser-service-processor/pom.xml b/layoutparser-service/layoutparser-service-processor/pom.xml deleted file mode 100644 index 8bcaeb5..0000000 --- a/layoutparser-service/layoutparser-service-processor/pom.xml +++ /dev/null @@ -1,71 +0,0 @@ - - - 4.0.0 - - - com.knecon.fforesight - layoutparser-service - 0.1-SNAPSHOT - - - layoutparser-service-processor - - - - com.iqser.red.service - persistence-service-shared-api-v1 - 2.36.0 - - - com.knecon.fforesight - tenant-commons - ${tennat-commons.version} - - - com.knecon.fforesight - layoutparser-service-internal-api - ${project.version} - - - com.iqser.red.commons - storage-commons - ${storage-commons.version} - - - org.apache.pdfbox - pdfbox - ${pdfbox.version} - - - org.apache.pdfbox - pdfbox-tools - ${pdfbox.version} - - - com.fasterxml.jackson.module - jackson-module-afterburner - ${jackson.version} - - - com.fasterxml.jackson.datatype - jackson-datatype-jsr310 - ${jackson.version} - - - org.springframework.boot - spring-boot-starter-web - - - org.springframework.boot - spring-boot-starter-amqp - - - org.junit.jupiter - junit-jupiter - RELEASE - test - - - - diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java index 5a61430..64351ec 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java @@ -48,40 +48,40 @@ public class LayoutParsingPipeline { public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException { long start = System.currentTimeMillis(); - PDDocument originDocument = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId()); - ImageServiceResponse imageServiceResponse = new ImageServiceResponse(); - if (layoutParsingRequest.imagesFileStorageId().isPresent()) { - imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.pageFileStorageId()); + try (PDDocument originDocument = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId())) { + ImageServiceResponse imageServiceResponse = new ImageServiceResponse(); + if (layoutParsingRequest.imagesFileStorageId().isPresent()) { + imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.pageFileStorageId()); + } + + TableServiceResponse tableServiceResponse = new TableServiceResponse(); + if (layoutParsingRequest.tablesFileStorageId().isPresent()) { + tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.pageFileStorageId()); + } + + Document documentGraph = parseLayout(layoutParsingRequest.layoutParsingType(), originDocument, imageServiceResponse, tableServiceResponse); + int numberOfPages = originDocument.getNumberOfPages(); + + layoutParsingStorageService.storeSectionGrid(layoutParsingRequest, sectionGridCreatorService.createSectionGrid(documentGraph)); + layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentGraph)); + + if (layoutParsingRequest.layoutParsingType().equals(LayoutParsingType.TAAS)) { + var researchDocumentData = TaasDocumentDataMapper.fromDocument(documentGraph); + layoutParsingStorageService.storeResearchDocumentData(layoutParsingRequest, researchDocumentData); + } + + return LayoutParsingFinishedEvent.builder() + .identifier(layoutParsingRequest.identifier()) + .numberOfPages(numberOfPages) + .duration(System.currentTimeMillis() - start) + .message(format("Layout parsing is finished and files have been saved with Ids:\n Structure: %s\nText: %s\nPositions: %s\nPageData: %s", + layoutParsingRequest.structureFileStorageId(), + layoutParsingRequest.textBlockFileStorageId(), + layoutParsingRequest.positionBlockFileStorageId(), + layoutParsingRequest.pageFileStorageId())) + .build(); } - - TableServiceResponse tableServiceResponse = new TableServiceResponse(); - if (layoutParsingRequest.tablesFileStorageId().isPresent()) { - tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.pageFileStorageId()); - } - - Document documentGraph = parseLayout(layoutParsingRequest.layoutParsingType(), originDocument, imageServiceResponse, tableServiceResponse); - int numberOfPages = originDocument.getNumberOfPages(); - originDocument.close(); - - layoutParsingStorageService.storeSectionGrid(layoutParsingRequest, sectionGridCreatorService.createSectionGrid(documentGraph)); - layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentGraph)); - - if (layoutParsingRequest.layoutParsingType().equals(LayoutParsingType.TAAS)) { - var researchDocumentData = TaasDocumentDataMapper.fromDocument(documentGraph); - layoutParsingStorageService.storeResearchDocumentData(layoutParsingRequest, researchDocumentData); - } - - return LayoutParsingFinishedEvent.builder() - .identifier(layoutParsingRequest.identifier()) - .numberOfPages(numberOfPages) - .duration(System.currentTimeMillis() - start) - .message(format("Layout parsing is finished and files have been saved with Ids:\n Structure: %s\nText: %s\nPositions: %s\nPageData: %s", - layoutParsingRequest.structureFileStorageId(), - layoutParsingRequest.textBlockFileStorageId(), - layoutParsingRequest.positionBlockFileStorageId(), - layoutParsingRequest.pageFileStorageId())) - .build(); } @@ -115,7 +115,8 @@ public class LayoutParsingPipeline { long start = System.currentTimeMillis(); - ClassificationDocument classificationDocument = pdfParsingService.parseDocument(layoutParsingType, originDocument, + ClassificationDocument classificationDocument = pdfParsingService.parseDocument(layoutParsingType, + originDocument, cvTableParsingAdapter.buildCvParsedTablesPerPage(tableServiceResponse), imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse)); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/factory/SearchTextWithTextPositionFactory.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/factory/SearchTextWithTextPositionFactory.java index afc179a..6a018ed 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/factory/SearchTextWithTextPositionFactory.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/factory/SearchTextWithTextPositionFactory.java @@ -5,6 +5,7 @@ import java.awt.geom.Rectangle2D; import java.util.Collections; import java.util.LinkedList; import java.util.List; +import java.util.Locale; import java.util.Objects; import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition; @@ -132,7 +133,7 @@ public class SearchTextWithTextPositionFactory { private static void addTextPositionWithFontType(RedTextPosition currentTextPosition, String fontType, List fontTypePositions, int stringIdx) { - if (currentTextPosition.getFontName().toLowerCase().contains(fontType)) { + if (currentTextPosition.getFontName().toLowerCase(Locale.ROOT).contains(fontType)) { fontTypePositions.add(stringIdx); } } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/graph/nodes/ImageType.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/graph/nodes/ImageType.java index 49566d1..b0f9e59 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/graph/nodes/ImageType.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/graph/nodes/ImageType.java @@ -1,5 +1,7 @@ package com.knecon.fforesight.service.layoutparser.processor.graph.nodes; +import java.util.Locale; + public enum ImageType { LOGO, FORMULA, @@ -10,7 +12,7 @@ public enum ImageType { public static ImageType fromString(String imageType) { - return switch (imageType.toLowerCase()) { + return switch (imageType.toLowerCase(Locale.ROOT)) { case "logo" -> ImageType.LOGO; case "formula" -> ImageType.FORMULA; case "signature" -> ImageType.SIGNATURE; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/graph/nodes/SemanticNode.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/graph/nodes/SemanticNode.java index d76b7a2..68c6202 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/graph/nodes/SemanticNode.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/graph/nodes/SemanticNode.java @@ -6,6 +6,7 @@ import java.awt.geom.Rectangle2D; import java.util.Comparator; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -285,7 +286,7 @@ public interface SemanticNode { */ default boolean containsStringIgnoreCase(String string) { - return getTextBlock().getSearchText().toLowerCase().contains(string.toLowerCase()); + return getTextBlock().getSearchText().toLowerCase(Locale.ROOT).contains(string.toLowerCase(Locale.ROOT)); } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/graph/nodes/Table.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/graph/nodes/Table.java index 18118b5..dab94ed 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/graph/nodes/Table.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/graph/nodes/Table.java @@ -5,6 +5,7 @@ import static java.lang.String.format; import java.util.Collection; import java.util.HashSet; import java.util.List; +import java.util.Locale; import java.util.Set; import java.util.stream.IntStream; import java.util.stream.Stream; @@ -67,7 +68,7 @@ public class Table implements SemanticNode { */ public boolean rowContainsStringsIgnoreCase(Integer row, List strings) { - String rowText = streamRow(row).map(TableCell::getTextBlock).collect(new TextBlockCollector()).getSearchText().toLowerCase(); + String rowText = streamRow(row).map(TableCell::getTextBlock).collect(new TextBlockCollector()).getSearchText().toLowerCase(Locale.ROOT); return strings.stream().map(String::toLowerCase).allMatch(rowText::contains); } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPositionSequence.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPositionSequence.java index d4e58e8..807383b 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPositionSequence.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPositionSequence.java @@ -4,6 +4,7 @@ import java.awt.geom.AffineTransform; import java.awt.geom.Point2D; import java.util.ArrayList; import java.util.List; +import java.util.Locale; import java.util.stream.Collectors; import org.apache.pdfbox.text.TextPosition; @@ -76,7 +77,7 @@ public class TextPositionSequence implements CharSequence { RedTextPosition textPosition = textPositionAt(index); String text = textPosition.getUnicode(); - return caseInSensitive ? text.toLowerCase().charAt(0) : text.charAt(0); + return caseInSensitive ? text.toLowerCase(Locale.ROOT).charAt(0) : text.charAt(0); } @@ -223,7 +224,7 @@ public class TextPositionSequence implements CharSequence { @JsonAttribute(ignore = true) public String getFont() { - return textPositions.get(0).getFontName().toLowerCase().replaceAll(",bold", "").replaceAll(",italic", ""); + return textPositions.get(0).getFontName().toLowerCase(Locale.ROOT).replaceAll(",bold", "").replaceAll(",italic", ""); } @@ -231,7 +232,7 @@ public class TextPositionSequence implements CharSequence { @JsonAttribute(ignore = true) public String getFontStyle() { - String lowercaseFontName = textPositions.get(0).getFontName().toLowerCase(); + String lowercaseFontName = textPositions.get(0).getFontName().toLowerCase(Locale.ROOT); if (lowercaseFontName.contains("bold") && lowercaseFontName.contains("italic")) { return "bold, italic"; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/GapsAcrossLinesService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/GapsAcrossLinesService.java index fd1b7f4..94bcce2 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/GapsAcrossLinesService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/GapsAcrossLinesService.java @@ -6,6 +6,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Queue; import java.util.stream.Stream; +import com.iqser.red.commons.jackson.ObjectMapperFactory; import com.knecon.fforesight.service.layoutparser.processor.model.GapInformation; @@ -93,7 +94,7 @@ public class GapsAcrossLinesService { int lineCount = 1; - public GapAcrossLines(Rectangle2D rectangle2D) { + GapAcrossLines(Rectangle2D rectangle2D) { this.rectangle2D = correctRectangle(rectangle2D); } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TextPositionSequenceSorter.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TextPositionSequenceSorter.java index 29e2634..7b09e78 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TextPositionSequenceSorter.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TextPositionSequenceSorter.java @@ -28,31 +28,30 @@ public class TextPositionSequenceSorter { List textPositionSequencesPerPage = new LinkedList<>(); try (InputStream inputStream = new ClassPathResource(filename).getInputStream()) { - PDDocument pdDocument = Loader.loadPDF(inputStream); + try (PDDocument pdDocument = Loader.loadPDF(inputStream)) { - for (int pageNumber = 1; pageNumber < pdDocument.getNumberOfPages() + 1; pageNumber++) { + for (int pageNumber = 1; pageNumber < pdDocument.getNumberOfPages() + 1; pageNumber++) { - PDFLinesTextStripper stripper = new PDFLinesTextStripper(); - PDPage pdPage = pdDocument.getPage(pageNumber - 1); - stripper.setPageNumber(pageNumber); - stripper.setSortByPosition(true); - stripper.setStartPage(pageNumber); - stripper.setEndPage(pageNumber); - stripper.setPdpage(pdPage); - stripper.getText(pdDocument); + PDFLinesTextStripper stripper = new PDFLinesTextStripper(); + PDPage pdPage = pdDocument.getPage(pageNumber - 1); + stripper.setPageNumber(pageNumber); + stripper.setSortByPosition(true); + stripper.setStartPage(pageNumber); + stripper.setEndPage(pageNumber); + stripper.setPdpage(pdPage); + stripper.getText(pdDocument); - Map> sortedTextPositionSequencesPerDir = stripper.getTextPositionSequences() - .stream() - .collect(Collectors.groupingBy(textPositionSequence -> textPositionSequence.getDir().getDegrees())); + Map> sortedTextPositionSequencesPerDir = stripper.getTextPositionSequences() + .stream() + .collect(Collectors.groupingBy(textPositionSequence -> textPositionSequence.getDir().getDegrees())); - var sortedTextPositionSequences = sortByDirAccordingToPageRotation(sortedTextPositionSequencesPerDir, pdPage.getRotation()); + var sortedTextPositionSequences = sortByDirAccordingToPageRotation(sortedTextPositionSequencesPerDir, pdPage.getRotation()); - textPositionSequencesPerPage.add(new PageContents(sortedTextPositionSequences, - RectangleTransformations.toRectangle2D(pdPage.getCropBox()), - RectangleTransformations.toRectangle2D(pdPage.getMediaBox()))); + textPositionSequencesPerPage.add(new PageContents(sortedTextPositionSequences, + RectangleTransformations.toRectangle2D(pdPage.getCropBox()), + RectangleTransformations.toRectangle2D(pdPage.getMediaBox()))); + } } - - pdDocument.close(); } return textPositionSequencesPerPage; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocuMineBlockificationService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocuMineBlockificationService.java index d5bd90d..a141621 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocuMineBlockificationService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocuMineBlockificationService.java @@ -39,7 +39,11 @@ public class DocuMineBlockificationService { List chunkWords = new ArrayList<>(); List chunkBlockList1 = new ArrayList<>(); - float minX = 1000, maxX = 0, minY = 1000, maxY = 0; + float minX = 1000; + float maxX = 0; + float minY = 1000; + float maxY = 0; + TextPositionSequence prev = null; boolean wasSplitted = false; diff --git a/layoutparser-service/layoutparser-service-server/build.gradle.kts b/layoutparser-service/layoutparser-service-server/build.gradle.kts new file mode 100644 index 0000000..9e76495 --- /dev/null +++ b/layoutparser-service/layoutparser-service-server/build.gradle.kts @@ -0,0 +1,75 @@ +import org.springframework.boot.gradle.tasks.bundling.BootBuildImage + +plugins { + id("com.knecon.fforesight.java-conventions") + id("org.springframework.boot") version "3.1.2" + id("io.spring.dependency-management") version "1.1.0" + id("org.sonarqube") version "4.2.1.3168" + id("io.freefair.lombok") version "8.1.0" + id ("org.graalvm.buildtools.native") version "0.9.23" +} + +dependencies { + implementation(project(":layoutparser-service-processor")) + + implementation("com.iqser.red.commons:storage-commons:2.1.0") + implementation("com.knecon.fforesight:tenant-commons:0.10.0") + + implementation("org.springframework.boot:spring-boot-starter-actuator:3.1.2") + implementation("com.amazonaws:aws-java-sdk-s3:1.12.514") + + + // for integration testing only + testImplementation(project(":layoutparser-service-internal-api")) + + testImplementation("org.springframework.boot:spring-boot-starter-amqp:3.0.6") + testImplementation("com.iqser.red.service:persistence-service-shared-api-v1:2.36.0") + testImplementation("com.iqser.red.commons:jackson-commons:1.0.0") + testImplementation("com.fasterxml.jackson.module:jackson-module-afterburner:2.15.0-rc2") + testImplementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.15.0-rc2") + testImplementation("org.apache.pdfbox:pdfbox:3.0.0-alpha2") + testImplementation("org.apache.pdfbox:pdfbox-tools:3.0.0-alpha2") + testImplementation("org.springframework.boot:spring-boot-starter-test:3.0.6") + testImplementation("org.apache.logging.log4j:log4j-slf4j-impl:2.19.0") +} + +description = "layoutparser-service-server" + +java { + withJavadocJar() +} + +// AOT seems to be the name of the generated classes for native images +// They are added as a SourceSet, and therefore checkstyle and pmd try to run on it +tasks.named("checkstyleAot") { + enabled = false +} +tasks.named("checkstyleAotTest") { + enabled = false +} +tasks.named("pmdAot") { + enabled = false +} +tasks.named("pmdAotTest") { + enabled = false +} + +tasks.named("bootBuildImage") { + imageName.set("nexus.knecon.com:5001/ff/${project.name}:${project.version}") + if (project.hasProperty("buildbootDockerHostNetwork")) { + network.set("host") + } + docker { + if (project.hasProperty("buildbootDockerHostNetwork")) { + bindHostToBuilder.set(true) + } + verboseLogging.set(true) + + publishRegistry { + username.set(providers.gradleProperty("mavenUser").getOrNull()) + password.set(providers.gradleProperty("mavenPassword").getOrNull()) + email.set(providers.gradleProperty("mavenEmail").getOrNull()) + url.set("https://nexus.knecon.com:5001/") + } + } +} diff --git a/layoutparser-service/layoutparser-service-server/pom.xml b/layoutparser-service/layoutparser-service-server/pom.xml deleted file mode 100644 index d278a4f..0000000 --- a/layoutparser-service/layoutparser-service-server/pom.xml +++ /dev/null @@ -1,84 +0,0 @@ - - - 4.0.0 - - - com.knecon.fforesight - layoutparser-service - 0.1-SNAPSHOT - - - layoutparser-service-server - - - - com.knecon.fforesight - layoutparser-service-processor - ${project.version} - - - org.springframework.boot - spring-boot-starter-amqp - - - org.springframework.cloud - spring-cloud-starter-openfeign - - - org.springframework.boot - spring-boot-starter-actuator - - - org.springframework.boot - spring-boot-starter-test - test - - - org.apache.logging.log4j - log4j-slf4j-impl - test - - - - - - - - - pl.project13.maven - git-commit-id-plugin - - - - revision - - - true - - true - - - - - - - - org.springframework.boot - spring-boot-maven-plugin - - - - repackage - - - true - - - - - - - - diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/ApplicationTests.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/ApplicationTests.java index 16489d0..a437284 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/ApplicationTests.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/ApplicationTests.java @@ -6,8 +6,9 @@ import com.knecon.fforesight.service.layoutparser.server.utils.BaseTest; class ApplicationTests extends BaseTest { - @Test - void contextLoads() { - } + @Test + void contextLoads() { + + } } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java index 34a5958..b734be0 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java @@ -45,8 +45,9 @@ public class BdrJsonBuildTest extends BaseTest { protected Document buildGraph(File filename) { try (InputStream inputStream = new FileInputStream(filename)) { - PDDocument pdDocument = Loader.loadPDF(inputStream); - return layoutParsingPipeline.parseLayoutWithTimer(LayoutParsingType.TAAS, pdDocument, new ImageServiceResponse(), new TableServiceResponse()); + try (PDDocument pdDocument = Loader.loadPDF(inputStream)) { + return layoutParsingPipeline.parseLayoutWithTimer(LayoutParsingType.TAAS, pdDocument, new ImageServiceResponse(), new TableServiceResponse()); + } } } @@ -94,12 +95,13 @@ public class BdrJsonBuildTest extends BaseTest { private static void visualizeSemanticNodes(File file, File resultingFileName, Document document, TextBlock textBlock) throws IOException { - try (var fileStream = new FileInputStream(file); var outputStream = new FileOutputStream(resultingFileName)) { - PDDocument pdDocument = Loader.loadPDF(fileStream); + try (var fileStream = new FileInputStream(file);// + PDDocument pdDocument = Loader.loadPDF(fileStream);// + var outputStream = new FileOutputStream(resultingFileName)// + ) { PdfDraw.drawDocumentGraph(pdDocument, document); PdfDraw.drawTextBlock(pdDocument, textBlock, PdfDraw.Options.builder().stroke(true).strokeWidth(0.1f).strokeColor(Color.YELLOW).build()); pdDocument.save(outputStream); - pdDocument.close(); } } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/BuildDocumentGraphTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/BuildDocumentGraphTest.java index 71f9ff5..f0de14e 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/BuildDocumentGraphTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/BuildDocumentGraphTest.java @@ -24,36 +24,34 @@ public class BuildDocumentGraphTest extends BaseTest { @Autowired protected LayoutParsingPipeline layoutParsingPipeline; + @Test @Disabled public void buildMetolachlor() { Document documentGraph = buildGraph("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06"); assertEquals(221, documentGraph.getPages().size()); - assertEquals(220 , documentGraph.getPages().stream().filter(page -> page.getHeader().hasText()).count()); - assertEquals(0 , documentGraph.getPages().stream().filter(page -> page.getFooter().hasText()).count()); + assertEquals(220, documentGraph.getPages().stream().filter(page -> page.getHeader().hasText()).count()); + assertEquals(0, documentGraph.getPages().stream().filter(page -> page.getFooter().hasText()).count()); } @SneakyThrows protected Document buildGraph(String filename) { - if (!filename.endsWith(".pdf")) { - filename = filename + ".pdf"; - } - - if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06")) { + if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf")) { prepareStorage(filename, "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json"); } else { prepareStorage(filename); } ClassPathResource fileResource = new ClassPathResource(filename); - try (InputStream inputStream = fileResource.getInputStream()) { - PDDocument pdDocument = Loader.loadPDF(inputStream); - return layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER, pdDocument, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse()); + try (InputStream inputStream = fileResource.getInputStream(); PDDocument pdDocument = Loader.loadPDF(inputStream)) { + return layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER, + pdDocument, + layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), + new TableServiceResponse()); } } - } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java index 4d258d6..efd8690 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java @@ -46,7 +46,7 @@ public class DocumentGraphJsonWritingTest extends BuildDocumentGraphTest { @Disabled public void writeJsonForFileTest() { - var resource = new ClassPathResource("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf"); + var resource = new ClassPathResource("files/1 Abamectin_prr.pdf"); writeJsons(resource.getFile().toPath()); } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphMappingTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphMappingTest.java index 63a8c58..a8d0ac5 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphMappingTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphMappingTest.java @@ -26,7 +26,7 @@ public class DocumentGraphMappingTest extends BuildDocumentGraphTest { @SneakyThrows public void testGraphMapping() { - String filename = "files/new/crafted document"; + String filename = "files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf"; Document document = buildGraph(filename); DocumentData documentData = DocumentDataMapper.toDocumentData(document); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphVisualizationTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphVisualizationTest.java index 5d46535..e6f0600 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphVisualizationTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphVisualizationTest.java @@ -63,12 +63,12 @@ public class DocumentGraphVisualizationTest extends BuildDocumentGraphTest { File tmpFile = File.createTempFile(filename, "SEMANTIC_NODES_BBOX.pdf"); ClassPathResource fileResource = new ClassPathResource(filename + ".pdf"); - try (var fileStream = fileResource.getInputStream()) { - PDDocument pdDocument = Loader.loadPDF(fileStream); - PdfDraw.drawDocumentGraph(pdDocument, documentGraph); - PdfDraw.drawTextBlock(pdDocument, textBlock, PdfDraw.Options.builder().stroke(true).strokeWidth(0.1f).strokeColor(Color.YELLOW).build()); - pdDocument.save(tmpFile); - pdDocument.close(); + try (var fileStream = fileResource.getInputStream();// + PDDocument pdDocument = Loader.loadPDF(fileStream)// + ) { + PdfDraw.drawDocumentGraph(pdDocument, documentGraph); + PdfDraw.drawTextBlock(pdDocument, textBlock, PdfDraw.Options.builder().stroke(true).strokeWidth(0.1f).strokeColor(Color.YELLOW).build()); + pdDocument.save(tmpFile); } } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java index 996407f..aa981b2 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java @@ -16,25 +16,10 @@ import java.util.stream.Collectors; import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.springframework.amqp.rabbit.core.RabbitTemplate; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.autoconfigure.EnableAutoConfiguration; -import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration; -import org.springframework.boot.test.context.SpringBootTest; -import org.springframework.boot.test.mock.mockito.MockBean; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.ComponentScan; -import org.springframework.context.annotation.Configuration; -import org.springframework.context.annotation.FilterType; -import org.springframework.context.annotation.Import; -import org.springframework.context.annotation.Primary; import org.springframework.core.io.ClassPathResource; -import org.springframework.test.context.junit.jupiter.SpringExtension; import com.fasterxml.jackson.databind.ObjectMapper; -import com.iqser.red.storage.commons.StorageAutoConfiguration; -import com.iqser.red.storage.commons.service.StorageService; import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType; import com.knecon.fforesight.service.layoutparser.processor.adapter.CvTableParsingAdapter; import com.knecon.fforesight.service.layoutparser.processor.adapter.ImageServiceResponseAdapter; @@ -48,15 +33,11 @@ import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePag import com.knecon.fforesight.service.layoutparser.processor.services.PdfParsingService; import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService; import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService; -import com.knecon.fforesight.service.layoutparser.server.Application; -import com.knecon.fforesight.service.layoutparser.server.utils.FileSystemBackedStorageService; +import com.knecon.fforesight.service.layoutparser.server.utils.BaseTest; import lombok.SneakyThrows; -@ExtendWith(SpringExtension.class) -@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT) -@Import(PdfSegmentationServiceTest.TestConfiguration.class) -public class PdfSegmentationServiceTest { +public class PdfSegmentationServiceTest extends BaseTest { @Autowired private PdfParsingService pdfParsingService; @@ -64,9 +45,6 @@ public class PdfSegmentationServiceTest { @Autowired private ObjectMapper objectMapper; - @MockBean - private RabbitTemplate rabbitTemplate; - @Autowired private RedactManagerClassificationService redactManagerClassificationService; @@ -79,21 +57,6 @@ public class PdfSegmentationServiceTest { @Autowired private SectionsBuilderService sectionsBuilderService; - @Configuration - @EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class}) - @ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)}) - public static class TestConfiguration { - - @Bean - @Primary - public StorageService inmemoryStorage() { - - return new FileSystemBackedStorageService(); - } - - } - - public ClassificationDocument buildClassificationDocument(PDDocument originDocument) { ClassificationDocument classificationDocument = pdfParsingService.parseDocument(LayoutParsingType.REDACT_MANAGER, diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/InvisibleTableDetectionServiceTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/InvisibleTableDetectionServiceTest.java index b1bdeaa..795585c 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/InvisibleTableDetectionServiceTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/InvisibleTableDetectionServiceTest.java @@ -7,15 +7,14 @@ import java.util.Collections; import java.util.List; import java.util.stream.Collectors; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import com.knecon.fforesight.service.layoutparser.processor.model.PageInformation; import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence; import com.knecon.fforesight.service.layoutparser.processor.services.InvisibleTableDetectionService; import com.knecon.fforesight.service.layoutparser.processor.services.PageInformationService; -import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations; import com.knecon.fforesight.service.layoutparser.processor.services.TextPositionSequenceSorter; +import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations; import com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw; import lombok.SneakyThrows; @@ -23,7 +22,7 @@ import lombok.SneakyThrows; class InvisibleTableDetectionServiceTest { @Test - @Disabled +// @Disabled @SneakyThrows public void detectInvisibleTableTest() { diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/visualizations/PdfDraw.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/visualizations/PdfDraw.java index e3d7822..9fbfa3e 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/visualizations/PdfDraw.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/visualizations/PdfDraw.java @@ -24,14 +24,13 @@ import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.Document import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.Page; import com.knecon.fforesight.service.layoutparser.processor.graph.textblock.AtomicTextBlock; import com.knecon.fforesight.service.layoutparser.processor.graph.textblock.TextBlock; -import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations; import com.knecon.fforesight.service.layoutparser.processor.utils.PdfVisualisationUtility; +import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations; import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Getter; -import lombok.NoArgsConstructor; import lombok.SneakyThrows; import lombok.experimental.FieldDefaults; import lombok.experimental.UtilityClass; @@ -41,20 +40,17 @@ public class PdfDraw { public static void drawRectanglesPerPage(String filename, List> rectanglesPerPage, String tmpFileName) throws IOException { - try (InputStream inputStream = new ClassPathResource(filename).getInputStream()) { - PDDocument pdDocument = Loader.loadPDF(inputStream); - + try (InputStream inputStream = new ClassPathResource(filename).getInputStream();// + PDDocument pdDocument = Loader.loadPDF(inputStream);// + var out = new FileOutputStream(tmpFileName)// + ) { for (int pageNumber = 1; pageNumber < pdDocument.getNumberOfPages() + 1; pageNumber++) { PdfVisualisationUtility.drawRectangle2DList(pdDocument, pageNumber, rectanglesPerPage.get(pageNumber - 1), PdfVisualisationUtility.Options.builder().stroke(true).build()); } - try (var out = new FileOutputStream(tmpFileName)) { - pdDocument.save(out); - pdDocument.close(); - } - + pdDocument.save(out); } } @@ -62,8 +58,10 @@ public class PdfDraw { public static void drawRectanglesPerPageNumberedByLine(String filename, List>> rectanglesPerPage, String tmpFileName) throws IOException { - try (InputStream inputStream = new ClassPathResource(filename).getInputStream()) { - PDDocument pdDocument = Loader.loadPDF(inputStream); + try (InputStream inputStream = new ClassPathResource(filename).getInputStream();// + PDDocument pdDocument = Loader.loadPDF(inputStream);// + var out = new FileOutputStream(tmpFileName)// + ) { for (int pageNumber = 1; pageNumber < pdDocument.getNumberOfPages() + 1; pageNumber++) { var rectanglesOnPage = rectanglesPerPage.get(pageNumber - 1); @@ -80,10 +78,8 @@ public class PdfDraw { } } - try (var out = new FileOutputStream(tmpFileName)) { - pdDocument.save(out); - pdDocument.close(); - } + + pdDocument.save(out); } @@ -92,15 +88,18 @@ public class PdfDraw { private static int countNumberOfDigits(int num) { - if (num == 0) { + int final_num = num; + if (final_num == 0) { return 1; } int count = 0; - for (; num != 0; num /= 10, ++count) { + for (; final_num != 0; final_num /= 10) { + count++; } return count; } + public static void drawDocumentGraph(PDDocument document, Document documentGraph) { documentGraph.getDocumentTree().allEntriesInOrder().forEach(entry -> drawNode(document, entry)); @@ -183,10 +182,12 @@ public class PdfDraw { @SneakyThrows - public static void drawRectanglesAndLinesPerPage(String filename, List> list, List> rectanglesPerPage, String tmpFileName) { + public static void drawRectanglesAndLinesPerPage(String filename, List> list, List> rectanglesPerPage, String tmpFileName) { - try (InputStream inputStream = new ClassPathResource(filename).getInputStream()) { - PDDocument pdDocument = Loader.loadPDF(inputStream); + try (InputStream inputStream = new ClassPathResource(filename).getInputStream();// + PDDocument pdDocument = Loader.loadPDF(inputStream);// + var out = new FileOutputStream(tmpFileName)// + ) { for (int pageNumber = 1; pageNumber < pdDocument.getNumberOfPages() + 1; pageNumber++) { // PdfVisualisationUtility.drawLine2DList(pdDocument, @@ -197,15 +198,9 @@ public class PdfDraw { pageNumber, rectanglesPerPage.get(pageNumber - 1), PdfVisualisationUtility.Options.builder().stroke(true).build()); - PdfVisualisationUtility.drawRectangle2DList(pdDocument, - pageNumber, - list.get(pageNumber - 1), - PdfVisualisationUtility.Options.builder().stroke(true).build()); - } - try (var out = new FileOutputStream(tmpFileName)) { - pdDocument.save(out); - pdDocument.close(); + PdfVisualisationUtility.drawRectangle2DList(pdDocument, pageNumber, list.get(pageNumber - 1), PdfVisualisationUtility.Options.builder().stroke(true).build()); } + pdDocument.save(out); } } @@ -213,19 +208,17 @@ public class PdfDraw { @Builder @AllArgsConstructor - @NoArgsConstructor @Getter @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) public static class Options { - @Builder.Default - boolean stroke = false; + boolean stroke; @Builder.Default Color strokeColor = Color.BLACK; @Builder.Default float strokeWidth = 1f; - @Builder.Default - boolean fill = false; + + boolean fill; @Builder.Default Color fillColor = Color.BLACK; @@ -250,13 +243,19 @@ public class PdfDraw { private static void drawBBoxAndLabelAndNumberOnPage(PDDocument document, DocumentTree.Entry entry, Options options) { Map rectanglesPerPage = entry.getNode().getBBox(); - rectanglesPerPage.forEach((page, rectangle2D) -> { + for (Page page : rectanglesPerPage.keySet()) { + Rectangle2D rectangle2D = rectanglesPerPage.get(page); if (entry.getType() == NodeType.SECTION) { rectangle2D = RectangleTransformations.pad(rectangle2D, 10, 10); } drawRectangle2DList(document, page.getNumber(), List.of(rectangle2D), options); - drawText(buildString(entry), document, new Point2D.Double(rectangle2D.getMinX(), rectangle2D.getMaxY() + 2), page.getNumber(), options, entry.getType() == NodeType.TABLE_CELL); - }); + drawText(buildString(entry), + document, + new Point2D.Double(rectangle2D.getMinX(), rectangle2D.getMaxY() + 2), + page.getNumber(), + options, + entry.getType() == NodeType.TABLE_CELL); + } } diff --git a/layoutparser-service/layoutparser-service-server/src/test/resources/application.yml b/layoutparser-service/layoutparser-service-server/src/test/resources/application.yml index 83c7a1c..18e4690 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/resources/application.yml +++ b/layoutparser-service/layoutparser-service-server/src/test/resources/application.yml @@ -31,7 +31,3 @@ management: prometheus.enabled: ${monitoring.enabled:false} health.enabled: true endpoints.web.exposure.include: prometheus, health - - -storage: - backend: 's3' diff --git a/layoutparser-service/layoutparser-service-server/src/test/resources/files/1 Abamectin_prr.pdf b/layoutparser-service/layoutparser-service-server/src/test/resources/files/1 Abamectin_prr.pdf new file mode 100644 index 0000000..1a10d08 Binary files /dev/null and b/layoutparser-service/layoutparser-service-server/src/test/resources/files/1 Abamectin_prr.pdf differ diff --git a/layoutparser-service/pom.xml b/layoutparser-service/pom.xml deleted file mode 100644 index 7f61c7e..0000000 --- a/layoutparser-service/pom.xml +++ /dev/null @@ -1,120 +0,0 @@ - - - 4.0.0 - - - org.springframework.boot - spring-boot-starter-parent - 3.0.6 - - - - com.knecon.fforesight - layoutparser-service - 0.1-SNAPSHOT - - pom - - - layoutparser-service-processor - layoutparser-service-internal-api - layoutparser-service-server - - - - 17 - 3.0.0-alpha2 - 31.1-jre - 2.15.0-rc2 - 0.10.0 - 2.1.0 - UTF-8 - - - - - org.projectlombok - lombok - 1.18.28 - provided - - - - - - - - org.springframework.cloud - spring-cloud-dependencies - 2022.0.2 - pom - import - - - - - - - - - - org.sonarsource.scanner.maven - sonar-maven-plugin - 3.9.0.2155 - - - org.owasp - dependency-check-maven - 6.3.1 - - ALL - - - - org.jacoco - jacoco-maven-plugin - - - prepare-agent - - prepare-agent - - - - report - - report - - - - - - - - - org.jacoco - jacoco-maven-plugin - 0.8.8 - - - prepare-agent - - prepare-agent - - - - report - - report-aggregate - - verify - - - - - - - - - diff --git a/pom.xml b/pom.xml deleted file mode 100644 index dd49370..0000000 --- a/pom.xml +++ /dev/null @@ -1,19 +0,0 @@ - - - 4.0.0 - - - com.knecon.fforesight - layoutparser - 0.1-SNAPSHOT - - - layoutparser-service - layoutparser-service-image - - - - pom - - diff --git a/settings.gradle.kts b/settings.gradle.kts new file mode 100644 index 0000000..696c016 --- /dev/null +++ b/settings.gradle.kts @@ -0,0 +1,13 @@ +/* + * This file was generated by the Gradle 'init' task. + * + * This project uses @Incubating APIs which are subject to change. + */ + +rootProject.name = "layoutparser" +include(":layoutparser-service-server") +include(":layoutparser-service-processor") +include(":layoutparser-service-internal-api") +project(":layoutparser-service-server").projectDir = file("layoutparser-service/layoutparser-service-server") +project(":layoutparser-service-processor").projectDir = file("layoutparser-service/layoutparser-service-processor") +project(":layoutparser-service-internal-api").projectDir = file("layoutparser-service/layoutparser-service-internal-api")