ported to gradle

This commit is contained in:
Kilian Schuettler 2023-07-27 12:27:30 +02:00
parent 270129cd73
commit 41267a0f98
46 changed files with 417 additions and 626 deletions

1
.gitignore vendored
View File

@ -18,6 +18,7 @@ target/
.settings
.springBeans
.sts4-cache
.gradle
### IntelliJ IDEA ###
.idea

View File

@ -1,4 +1,4 @@
include:
- project: 'gitlab/gitlab'
ref: 'main'
file: 'ci-templates/maven_java.yml'
file: 'ci-templates/gradle_java.yml'

Binary file not shown.

View File

@ -1,18 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.7/apache-maven-3.8.7-bin.zip
wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.1.1/maven-wrapper-3.1.1.jar

View File

@ -0,0 +1,7 @@
plugins {
`kotlin-dsl`
}
repositories {
gradlePluginPortal()
}

View File

@ -0,0 +1,56 @@
plugins {
java
pmd
checkstyle
jacoco
}
group = "com.knecon.fforesight"
version = "0.1-SNAPSHOT"
java.sourceCompatibility = JavaVersion.VERSION_17
java.targetCompatibility = JavaVersion.VERSION_17
tasks.jacocoTestReport {
reports {
xml.required.set(false)
csv.required.set(false)
html.outputLocation.set(layout.buildDirectory.dir("jacocoHtml"))
}
}
tasks.pmdMain {
pmd.ruleSetFiles = files("${rootDir}/config/pmd/pmd.xml")
}
tasks.pmdTest {
pmd.ruleSetFiles = files("${rootDir}/config/pmd/test_pmd.xml")
}
tasks.named<Test>("test") {
useJUnitPlatform()
}
tasks.test {
finalizedBy(tasks.jacocoTestReport) // report is always generated after tests run
}
tasks.jacocoTestReport {
dependsOn(tasks.test) // tests are required to run before generating the report
reports {
xml.required.set(true)
csv.required.set(false)
}
}
repositories {
mavenLocal()
mavenCentral()
maven {
url = uri("https://nexus.knecon.com/repository/gindev/");
credentials {
username = providers.gradleProperty("mavenUser").getOrNull();
password = providers.gradleProperty("mavenPassword").getOrNull();
}
}
}

View File

@ -0,0 +1,39 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE module PUBLIC "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
<module name="Checker">
<property
name="severity"
value="error"/>
<module name="TreeWalker">
<module name="SuppressWarningsHolder"/>
<module name="MissingDeprecated"/>
<module name="MissingOverride"/>
<module name="AnnotationLocation"/>
<module name="JavadocStyle"/>
<module name="NonEmptyAtclauseDescription"/>
<module name="IllegalImport"/>
<module name="RedundantImport"/>
<module name="RedundantModifier"/>
<module name="EmptyBlock"/>
<module name="DefaultComesLast"/>
<module name="EmptyStatement"/>
<module name="EqualsHashCode"/>
<module name="ExplicitInitialization"/>
<module name="IllegalInstantiation"/>
<module name="ModifiedControlVariable"/>
<module name="MultipleVariableDeclarations"/>
<module name="PackageDeclaration"/>
<module name="ParameterAssignment"/>
<module name="SimplifyBooleanExpression"/>
<module name="SimplifyBooleanReturn"/>
<module name="StringLiteralEquality"/>
<module name="OneStatementPerLine"/>
<module name="FinalClass"/>
<module name="ArrayTypeStyle"/>
<module name="UpperEll"/>
<module name="OuterTypeFilename"/>
</module>
<module name="FileTabCharacter"/>
<module name="SuppressWarningsFilter"/>
</module>

21
config/pmd/pmd.xml Normal file
View File

@ -0,0 +1,21 @@
<?xml version="1.0"?>
<ruleset name="Custom ruleset"
xmlns="http://pmd.sourceforge.net/ruleset/2.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://pmd.sourceforge.net/ruleset/2.0.0 http://pmd.sourceforge.net/ruleset_2_0_0.xsd">
<description>
Knecon ruleset checks the code for bad stuff
</description>
<rule ref="category/java/errorprone.xml">
<exclude name="DataflowAnomalyAnalysis"/>
<exclude name="MissingSerialVersionUID"/>
<exclude name="NullAssignment"/>
<exclude name="BeanMembersShouldSerialize"/>
<exclude name="AvoidLiteralsInIfCondition"/>
<exclude name="AvoidDuplicateLiterals"/>
<exclude name="AvoidFieldNameMatchingMethodName"/>
</rule>
</ruleset>

23
config/pmd/test_pmd.xml Normal file
View File

@ -0,0 +1,23 @@
<?xml version="1.0"?>
<ruleset name="Custom ruleset"
xmlns="http://pmd.sourceforge.net/ruleset/2.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://pmd.sourceforge.net/ruleset/2.0.0 http://pmd.sourceforge.net/ruleset_2_0_0.xsd">
<description>
Knecon test ruleset checks the code for bad stuff
</description>
<rule ref="category/java/errorprone.xml">
<exclude name="DataflowAnomalyAnalysis"/>
<exclude name="MissingSerialVersionUID"/>
<exclude name="NullAssignment"/>
<exclude name="BeanMembersShouldSerialize"/>
<exclude name="AvoidLiteralsInIfCondition"/>
<exclude name="AvoidDuplicateLiterals"/>
<exclude name="AvoidFieldNameMatchingMethodName"/>
<exclude name="AvoidFieldNameMatchingTypeName"/>
</rule>
</ruleset>

View File

@ -1,99 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.knecon.fforesight</groupId>
<artifactId>platform-docker-dependency</artifactId>
<version>0.1.0</version>
<relativePath/>
</parent>
<groupId>com.knecon.fforesight</groupId>
<artifactId>layoutparser-service-image</artifactId>
<version>0.1-SNAPSHOT</version>
<packaging>pom</packaging>
<properties>
<service.server>layoutparser-service-server</service.server>
<platform.jar>${service.server}.jar</platform.jar>
<docker.skip.push>false</docker.skip.push>
<docker.image.prefix>ff</docker.image.prefix>
<docker.image.name>${docker.image.prefix}/${service.server}</docker.image.name>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>io.fabric8</groupId>
<artifactId>docker-maven-plugin</artifactId>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>download-platform-jar</id>
<phase>prepare-package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<artifactItems>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>${service.server}</artifactId>
<version>${project.version}</version>
<type>jar</type>
<overWrite>true</overWrite>
<destFileName>${platform.jar}</destFileName>
</dependency>
</artifactItems>
<outputDirectory>${docker.build.directory}</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>io.fabric8</groupId>
<artifactId>docker-maven-plugin</artifactId>
<configuration>
<images>
<image>
<name>${docker.image.name}</name>
<build>
<dockerFileDir>${docker.build.directory}</dockerFileDir>
<args>
<PLATFORM_JAR>${platform.jar}</PLATFORM_JAR>
</args>
<tags>
<tag>${docker.image.version}</tag>
<tag>latest</tag>
</tags>
</build>
</image>
</images>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>

View File

@ -1,9 +0,0 @@
FROM red/base-image:2.0.2
ARG PLATFORM_JAR
ENV PLATFORM_JAR ${PLATFORM_JAR}
ENV USES_ELASTICSEARCH false
COPY ["${PLATFORM_JAR}", "/"]

View File

@ -0,0 +1,6 @@
plugins {
id("com.knecon.fforesight.java-conventions")
id("io.freefair.lombok") version "8.1.0"
}
description = "layoutparser-service-internal-api"

View File

@ -1,24 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.knecon.fforesight</groupId>
<artifactId>layoutparser-service</artifactId>
<version>0.1-SNAPSHOT</version>
</parent>
<artifactId>layoutparser-service-internal-api</artifactId>
<dependencies>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>${guava.version}</version>
</dependency>
</dependencies>
</project>

View File

@ -17,5 +17,4 @@ public class DocumentData {
DocumentPositionData[] documentPositions;
DocumentStructure documentStructure;
}

View File

@ -4,12 +4,14 @@ import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.FieldDefaults;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
@FieldDefaults(level = AccessLevel.PRIVATE)
public class DocumentPage {
int number;

View File

@ -4,12 +4,14 @@ import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.FieldDefaults;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
@FieldDefaults(level = AccessLevel.PRIVATE)
public class DocumentPositionData {
Long id;

View File

@ -8,14 +8,13 @@ import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.experimental.FieldDefaults;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
public class DocumentStructure {
@ -53,10 +52,11 @@ public class DocumentStructure {
}
@Data
@Builder
@Getter
@NoArgsConstructor
@AllArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
@FieldDefaults(level = AccessLevel.PRIVATE)
public static class EntryData {
NodeType type;

View File

@ -6,12 +6,14 @@ import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.FieldDefaults;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
@FieldDefaults(level = AccessLevel.PRIVATE)
public class DocumentTextData {
Long id;
@ -22,6 +24,4 @@ public class DocumentTextData {
int end;
int[] lineBreaks;
}

View File

@ -12,4 +12,5 @@ public class TableData {
List<RowData> rowData;
Integer numberOfCols;
Integer numberOfRows;
}

View File

@ -0,0 +1,21 @@
plugins {
id("com.knecon.fforesight.java-conventions")
id("io.freefair.lombok") version "8.1.0"
}
dependencies {
implementation(project(":layoutparser-service-internal-api"))
implementation("com.iqser.red.service:persistence-service-shared-api-v1:2.36.0")
implementation("com.knecon.fforesight:tenant-commons:0.10.0")
implementation("com.iqser.red.commons:storage-commons:2.1.0")
implementation("org.apache.pdfbox:pdfbox:3.0.0-alpha2")
implementation("org.apache.pdfbox:pdfbox-tools:3.0.0-alpha2")
implementation("com.fasterxml.jackson.module:jackson-module-afterburner:2.15.0-rc2")
implementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.15.0-rc2")
implementation("org.springframework.boot:spring-boot-starter-web:3.0.6")
implementation("org.springframework.boot:spring-boot-starter-amqp:3.0.6")
}
description = "layoutparser-service-processor"

View File

@ -1,71 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.knecon.fforesight</groupId>
<artifactId>layoutparser-service</artifactId>
<version>0.1-SNAPSHOT</version>
</parent>
<artifactId>layoutparser-service-processor</artifactId>
<dependencies>
<dependency>
<groupId>com.iqser.red.service</groupId>
<artifactId>persistence-service-shared-api-v1</artifactId>
<version>2.36.0</version>
</dependency>
<dependency>
<groupId>com.knecon.fforesight</groupId>
<artifactId>tenant-commons</artifactId>
<version>${tennat-commons.version}</version>
</dependency>
<dependency>
<groupId>com.knecon.fforesight</groupId>
<artifactId>layoutparser-service-internal-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.iqser.red.commons</groupId>
<artifactId>storage-commons</artifactId>
<version>${storage-commons.version}</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>${pdfbox.version}</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-tools</artifactId>
<version>${pdfbox.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-afterburner</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.datatype</groupId>
<artifactId>jackson-datatype-jsr310</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-amqp</artifactId>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<version>RELEASE</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>

View File

@ -48,40 +48,40 @@ public class LayoutParsingPipeline {
public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException {
long start = System.currentTimeMillis();
PDDocument originDocument = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId());
ImageServiceResponse imageServiceResponse = new ImageServiceResponse();
if (layoutParsingRequest.imagesFileStorageId().isPresent()) {
imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.pageFileStorageId());
try (PDDocument originDocument = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId())) {
ImageServiceResponse imageServiceResponse = new ImageServiceResponse();
if (layoutParsingRequest.imagesFileStorageId().isPresent()) {
imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.pageFileStorageId());
}
TableServiceResponse tableServiceResponse = new TableServiceResponse();
if (layoutParsingRequest.tablesFileStorageId().isPresent()) {
tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.pageFileStorageId());
}
Document documentGraph = parseLayout(layoutParsingRequest.layoutParsingType(), originDocument, imageServiceResponse, tableServiceResponse);
int numberOfPages = originDocument.getNumberOfPages();
layoutParsingStorageService.storeSectionGrid(layoutParsingRequest, sectionGridCreatorService.createSectionGrid(documentGraph));
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentGraph));
if (layoutParsingRequest.layoutParsingType().equals(LayoutParsingType.TAAS)) {
var researchDocumentData = TaasDocumentDataMapper.fromDocument(documentGraph);
layoutParsingStorageService.storeResearchDocumentData(layoutParsingRequest, researchDocumentData);
}
return LayoutParsingFinishedEvent.builder()
.identifier(layoutParsingRequest.identifier())
.numberOfPages(numberOfPages)
.duration(System.currentTimeMillis() - start)
.message(format("Layout parsing is finished and files have been saved with Ids:\n Structure: %s\nText: %s\nPositions: %s\nPageData: %s",
layoutParsingRequest.structureFileStorageId(),
layoutParsingRequest.textBlockFileStorageId(),
layoutParsingRequest.positionBlockFileStorageId(),
layoutParsingRequest.pageFileStorageId()))
.build();
}
TableServiceResponse tableServiceResponse = new TableServiceResponse();
if (layoutParsingRequest.tablesFileStorageId().isPresent()) {
tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.pageFileStorageId());
}
Document documentGraph = parseLayout(layoutParsingRequest.layoutParsingType(), originDocument, imageServiceResponse, tableServiceResponse);
int numberOfPages = originDocument.getNumberOfPages();
originDocument.close();
layoutParsingStorageService.storeSectionGrid(layoutParsingRequest, sectionGridCreatorService.createSectionGrid(documentGraph));
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentGraph));
if (layoutParsingRequest.layoutParsingType().equals(LayoutParsingType.TAAS)) {
var researchDocumentData = TaasDocumentDataMapper.fromDocument(documentGraph);
layoutParsingStorageService.storeResearchDocumentData(layoutParsingRequest, researchDocumentData);
}
return LayoutParsingFinishedEvent.builder()
.identifier(layoutParsingRequest.identifier())
.numberOfPages(numberOfPages)
.duration(System.currentTimeMillis() - start)
.message(format("Layout parsing is finished and files have been saved with Ids:\n Structure: %s\nText: %s\nPositions: %s\nPageData: %s",
layoutParsingRequest.structureFileStorageId(),
layoutParsingRequest.textBlockFileStorageId(),
layoutParsingRequest.positionBlockFileStorageId(),
layoutParsingRequest.pageFileStorageId()))
.build();
}
@ -115,7 +115,8 @@ public class LayoutParsingPipeline {
long start = System.currentTimeMillis();
ClassificationDocument classificationDocument = pdfParsingService.parseDocument(layoutParsingType, originDocument,
ClassificationDocument classificationDocument = pdfParsingService.parseDocument(layoutParsingType,
originDocument,
cvTableParsingAdapter.buildCvParsedTablesPerPage(tableServiceResponse),
imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse));

View File

@ -5,6 +5,7 @@ import java.awt.geom.Rectangle2D;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition;
@ -132,7 +133,7 @@ public class SearchTextWithTextPositionFactory {
private static void addTextPositionWithFontType(RedTextPosition currentTextPosition, String fontType, List<Integer> fontTypePositions, int stringIdx) {
if (currentTextPosition.getFontName().toLowerCase().contains(fontType)) {
if (currentTextPosition.getFontName().toLowerCase(Locale.ROOT).contains(fontType)) {
fontTypePositions.add(stringIdx);
}
}

View File

@ -1,5 +1,7 @@
package com.knecon.fforesight.service.layoutparser.processor.graph.nodes;
import java.util.Locale;
public enum ImageType {
LOGO,
FORMULA,
@ -10,7 +12,7 @@ public enum ImageType {
public static ImageType fromString(String imageType) {
return switch (imageType.toLowerCase()) {
return switch (imageType.toLowerCase(Locale.ROOT)) {
case "logo" -> ImageType.LOGO;
case "formula" -> ImageType.FORMULA;
case "signature" -> ImageType.SIGNATURE;

View File

@ -6,6 +6,7 @@ import java.awt.geom.Rectangle2D;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
@ -285,7 +286,7 @@ public interface SemanticNode {
*/
default boolean containsStringIgnoreCase(String string) {
return getTextBlock().getSearchText().toLowerCase().contains(string.toLowerCase());
return getTextBlock().getSearchText().toLowerCase(Locale.ROOT).contains(string.toLowerCase(Locale.ROOT));
}

View File

@ -5,6 +5,7 @@ import static java.lang.String.format;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.stream.IntStream;
import java.util.stream.Stream;
@ -67,7 +68,7 @@ public class Table implements SemanticNode {
*/
public boolean rowContainsStringsIgnoreCase(Integer row, List<String> strings) {
String rowText = streamRow(row).map(TableCell::getTextBlock).collect(new TextBlockCollector()).getSearchText().toLowerCase();
String rowText = streamRow(row).map(TableCell::getTextBlock).collect(new TextBlockCollector()).getSearchText().toLowerCase(Locale.ROOT);
return strings.stream().map(String::toLowerCase).allMatch(rowText::contains);
}

View File

@ -4,6 +4,7 @@ import java.awt.geom.AffineTransform;
import java.awt.geom.Point2D;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.stream.Collectors;
import org.apache.pdfbox.text.TextPosition;
@ -76,7 +77,7 @@ public class TextPositionSequence implements CharSequence {
RedTextPosition textPosition = textPositionAt(index);
String text = textPosition.getUnicode();
return caseInSensitive ? text.toLowerCase().charAt(0) : text.charAt(0);
return caseInSensitive ? text.toLowerCase(Locale.ROOT).charAt(0) : text.charAt(0);
}
@ -223,7 +224,7 @@ public class TextPositionSequence implements CharSequence {
@JsonAttribute(ignore = true)
public String getFont() {
return textPositions.get(0).getFontName().toLowerCase().replaceAll(",bold", "").replaceAll(",italic", "");
return textPositions.get(0).getFontName().toLowerCase(Locale.ROOT).replaceAll(",bold", "").replaceAll(",italic", "");
}
@ -231,7 +232,7 @@ public class TextPositionSequence implements CharSequence {
@JsonAttribute(ignore = true)
public String getFontStyle() {
String lowercaseFontName = textPositions.get(0).getFontName().toLowerCase();
String lowercaseFontName = textPositions.get(0).getFontName().toLowerCase(Locale.ROOT);
if (lowercaseFontName.contains("bold") && lowercaseFontName.contains("italic")) {
return "bold, italic";

View File

@ -6,6 +6,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.stream.Stream;
import com.iqser.red.commons.jackson.ObjectMapperFactory;
import com.knecon.fforesight.service.layoutparser.processor.model.GapInformation;
@ -93,7 +94,7 @@ public class GapsAcrossLinesService {
int lineCount = 1;
public GapAcrossLines(Rectangle2D rectangle2D) {
GapAcrossLines(Rectangle2D rectangle2D) {
this.rectangle2D = correctRectangle(rectangle2D);
}

View File

@ -28,31 +28,30 @@ public class TextPositionSequenceSorter {
List<PageContents> textPositionSequencesPerPage = new LinkedList<>();
try (InputStream inputStream = new ClassPathResource(filename).getInputStream()) {
PDDocument pdDocument = Loader.loadPDF(inputStream);
try (PDDocument pdDocument = Loader.loadPDF(inputStream)) {
for (int pageNumber = 1; pageNumber < pdDocument.getNumberOfPages() + 1; pageNumber++) {
for (int pageNumber = 1; pageNumber < pdDocument.getNumberOfPages() + 1; pageNumber++) {
PDFLinesTextStripper stripper = new PDFLinesTextStripper();
PDPage pdPage = pdDocument.getPage(pageNumber - 1);
stripper.setPageNumber(pageNumber);
stripper.setSortByPosition(true);
stripper.setStartPage(pageNumber);
stripper.setEndPage(pageNumber);
stripper.setPdpage(pdPage);
stripper.getText(pdDocument);
PDFLinesTextStripper stripper = new PDFLinesTextStripper();
PDPage pdPage = pdDocument.getPage(pageNumber - 1);
stripper.setPageNumber(pageNumber);
stripper.setSortByPosition(true);
stripper.setStartPage(pageNumber);
stripper.setEndPage(pageNumber);
stripper.setPdpage(pdPage);
stripper.getText(pdDocument);
Map<Float, List<TextPositionSequence>> sortedTextPositionSequencesPerDir = stripper.getTextPositionSequences()
.stream()
.collect(Collectors.groupingBy(textPositionSequence -> textPositionSequence.getDir().getDegrees()));
Map<Float, List<TextPositionSequence>> sortedTextPositionSequencesPerDir = stripper.getTextPositionSequences()
.stream()
.collect(Collectors.groupingBy(textPositionSequence -> textPositionSequence.getDir().getDegrees()));
var sortedTextPositionSequences = sortByDirAccordingToPageRotation(sortedTextPositionSequencesPerDir, pdPage.getRotation());
var sortedTextPositionSequences = sortByDirAccordingToPageRotation(sortedTextPositionSequencesPerDir, pdPage.getRotation());
textPositionSequencesPerPage.add(new PageContents(sortedTextPositionSequences,
RectangleTransformations.toRectangle2D(pdPage.getCropBox()),
RectangleTransformations.toRectangle2D(pdPage.getMediaBox())));
textPositionSequencesPerPage.add(new PageContents(sortedTextPositionSequences,
RectangleTransformations.toRectangle2D(pdPage.getCropBox()),
RectangleTransformations.toRectangle2D(pdPage.getMediaBox())));
}
}
pdDocument.close();
}
return textPositionSequencesPerPage;

View File

@ -39,7 +39,11 @@ public class DocuMineBlockificationService {
List<TextPositionSequence> chunkWords = new ArrayList<>();
List<AbstractPageBlock> chunkBlockList1 = new ArrayList<>();
float minX = 1000, maxX = 0, minY = 1000, maxY = 0;
float minX = 1000;
float maxX = 0;
float minY = 1000;
float maxY = 0;
TextPositionSequence prev = null;
boolean wasSplitted = false;

View File

@ -0,0 +1,75 @@
import org.springframework.boot.gradle.tasks.bundling.BootBuildImage
plugins {
id("com.knecon.fforesight.java-conventions")
id("org.springframework.boot") version "3.1.2"
id("io.spring.dependency-management") version "1.1.0"
id("org.sonarqube") version "4.2.1.3168"
id("io.freefair.lombok") version "8.1.0"
id ("org.graalvm.buildtools.native") version "0.9.23"
}
dependencies {
implementation(project(":layoutparser-service-processor"))
implementation("com.iqser.red.commons:storage-commons:2.1.0")
implementation("com.knecon.fforesight:tenant-commons:0.10.0")
implementation("org.springframework.boot:spring-boot-starter-actuator:3.1.2")
implementation("com.amazonaws:aws-java-sdk-s3:1.12.514")
// for integration testing only
testImplementation(project(":layoutparser-service-internal-api"))
testImplementation("org.springframework.boot:spring-boot-starter-amqp:3.0.6")
testImplementation("com.iqser.red.service:persistence-service-shared-api-v1:2.36.0")
testImplementation("com.iqser.red.commons:jackson-commons:1.0.0")
testImplementation("com.fasterxml.jackson.module:jackson-module-afterburner:2.15.0-rc2")
testImplementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.15.0-rc2")
testImplementation("org.apache.pdfbox:pdfbox:3.0.0-alpha2")
testImplementation("org.apache.pdfbox:pdfbox-tools:3.0.0-alpha2")
testImplementation("org.springframework.boot:spring-boot-starter-test:3.0.6")
testImplementation("org.apache.logging.log4j:log4j-slf4j-impl:2.19.0")
}
description = "layoutparser-service-server"
java {
withJavadocJar()
}
// AOT seems to be the name of the generated classes for native images
// They are added as a SourceSet, and therefore checkstyle and pmd try to run on it
tasks.named<Checkstyle>("checkstyleAot") {
enabled = false
}
tasks.named<Checkstyle>("checkstyleAotTest") {
enabled = false
}
tasks.named<Pmd>("pmdAot") {
enabled = false
}
tasks.named<Pmd>("pmdAotTest") {
enabled = false
}
tasks.named<BootBuildImage>("bootBuildImage") {
imageName.set("nexus.knecon.com:5001/ff/${project.name}:${project.version}")
if (project.hasProperty("buildbootDockerHostNetwork")) {
network.set("host")
}
docker {
if (project.hasProperty("buildbootDockerHostNetwork")) {
bindHostToBuilder.set(true)
}
verboseLogging.set(true)
publishRegistry {
username.set(providers.gradleProperty("mavenUser").getOrNull())
password.set(providers.gradleProperty("mavenPassword").getOrNull())
email.set(providers.gradleProperty("mavenEmail").getOrNull())
url.set("https://nexus.knecon.com:5001/")
}
}
}

View File

@ -1,84 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.knecon.fforesight</groupId>
<artifactId>layoutparser-service</artifactId>
<version>0.1-SNAPSHOT</version>
</parent>
<artifactId>layoutparser-service-server</artifactId>
<dependencies>
<dependency>
<groupId>com.knecon.fforesight</groupId>
<artifactId>layoutparser-service-processor</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-amqp</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-starter-openfeign</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-actuator</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<!-- generate git.properties for exposure in /info -->
<groupId>pl.project13.maven</groupId>
<artifactId>git-commit-id-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>revision</goal>
</goals>
<configuration>
<generateGitPropertiesFile>true</generateGitPropertiesFile>
<gitDescribe>
<tags>true</tags>
</gitDescribe>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<!-- repackages the generated jar into a runnable fat-jar and makes it
executable -->
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
<configuration>
<executable>true</executable>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -6,8 +6,9 @@ import com.knecon.fforesight.service.layoutparser.server.utils.BaseTest;
class ApplicationTests extends BaseTest {
@Test
void contextLoads() {
}
@Test
void contextLoads() {
}
}

View File

@ -45,8 +45,9 @@ public class BdrJsonBuildTest extends BaseTest {
protected Document buildGraph(File filename) {
try (InputStream inputStream = new FileInputStream(filename)) {
PDDocument pdDocument = Loader.loadPDF(inputStream);
return layoutParsingPipeline.parseLayoutWithTimer(LayoutParsingType.TAAS, pdDocument, new ImageServiceResponse(), new TableServiceResponse());
try (PDDocument pdDocument = Loader.loadPDF(inputStream)) {
return layoutParsingPipeline.parseLayoutWithTimer(LayoutParsingType.TAAS, pdDocument, new ImageServiceResponse(), new TableServiceResponse());
}
}
}
@ -94,12 +95,13 @@ public class BdrJsonBuildTest extends BaseTest {
private static void visualizeSemanticNodes(File file, File resultingFileName, Document document, TextBlock textBlock) throws IOException {
try (var fileStream = new FileInputStream(file); var outputStream = new FileOutputStream(resultingFileName)) {
PDDocument pdDocument = Loader.loadPDF(fileStream);
try (var fileStream = new FileInputStream(file);//
PDDocument pdDocument = Loader.loadPDF(fileStream);//
var outputStream = new FileOutputStream(resultingFileName)//
) {
PdfDraw.drawDocumentGraph(pdDocument, document);
PdfDraw.drawTextBlock(pdDocument, textBlock, PdfDraw.Options.builder().stroke(true).strokeWidth(0.1f).strokeColor(Color.YELLOW).build());
pdDocument.save(outputStream);
pdDocument.close();
}
}

View File

@ -24,36 +24,34 @@ public class BuildDocumentGraphTest extends BaseTest {
@Autowired
protected LayoutParsingPipeline layoutParsingPipeline;
@Test
@Disabled
public void buildMetolachlor() {
Document documentGraph = buildGraph("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
assertEquals(221, documentGraph.getPages().size());
assertEquals(220 , documentGraph.getPages().stream().filter(page -> page.getHeader().hasText()).count());
assertEquals(0 , documentGraph.getPages().stream().filter(page -> page.getFooter().hasText()).count());
assertEquals(220, documentGraph.getPages().stream().filter(page -> page.getHeader().hasText()).count());
assertEquals(0, documentGraph.getPages().stream().filter(page -> page.getFooter().hasText()).count());
}
@SneakyThrows
protected Document buildGraph(String filename) {
if (!filename.endsWith(".pdf")) {
filename = filename + ".pdf";
}
if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06")) {
if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf")) {
prepareStorage(filename, "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json");
} else {
prepareStorage(filename);
}
ClassPathResource fileResource = new ClassPathResource(filename);
try (InputStream inputStream = fileResource.getInputStream()) {
PDDocument pdDocument = Loader.loadPDF(inputStream);
return layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER, pdDocument, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse());
try (InputStream inputStream = fileResource.getInputStream(); PDDocument pdDocument = Loader.loadPDF(inputStream)) {
return layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
pdDocument,
layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID),
new TableServiceResponse());
}
}
}

View File

@ -46,7 +46,7 @@ public class DocumentGraphJsonWritingTest extends BuildDocumentGraphTest {
@Disabled
public void writeJsonForFileTest() {
var resource = new ClassPathResource("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
var resource = new ClassPathResource("files/1 Abamectin_prr.pdf");
writeJsons(resource.getFile().toPath());
}

View File

@ -26,7 +26,7 @@ public class DocumentGraphMappingTest extends BuildDocumentGraphTest {
@SneakyThrows
public void testGraphMapping() {
String filename = "files/new/crafted document";
String filename = "files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf";
Document document = buildGraph(filename);
DocumentData documentData = DocumentDataMapper.toDocumentData(document);

View File

@ -63,12 +63,12 @@ public class DocumentGraphVisualizationTest extends BuildDocumentGraphTest {
File tmpFile = File.createTempFile(filename, "SEMANTIC_NODES_BBOX.pdf");
ClassPathResource fileResource = new ClassPathResource(filename + ".pdf");
try (var fileStream = fileResource.getInputStream()) {
PDDocument pdDocument = Loader.loadPDF(fileStream);
PdfDraw.drawDocumentGraph(pdDocument, documentGraph);
PdfDraw.drawTextBlock(pdDocument, textBlock, PdfDraw.Options.builder().stroke(true).strokeWidth(0.1f).strokeColor(Color.YELLOW).build());
pdDocument.save(tmpFile);
pdDocument.close();
try (var fileStream = fileResource.getInputStream();//
PDDocument pdDocument = Loader.loadPDF(fileStream)//
) {
PdfDraw.drawDocumentGraph(pdDocument, documentGraph);
PdfDraw.drawTextBlock(pdDocument, textBlock, PdfDraw.Options.builder().stroke(true).strokeWidth(0.1f).strokeColor(Color.YELLOW).build());
pdDocument.save(tmpFile);
}
}

View File

@ -16,25 +16,10 @@ import java.util.stream.Collectors;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.FilterType;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.service.layoutparser.processor.adapter.CvTableParsingAdapter;
import com.knecon.fforesight.service.layoutparser.processor.adapter.ImageServiceResponseAdapter;
@ -48,15 +33,11 @@ import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePag
import com.knecon.fforesight.service.layoutparser.processor.services.PdfParsingService;
import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService;
import com.knecon.fforesight.service.layoutparser.server.Application;
import com.knecon.fforesight.service.layoutparser.server.utils.FileSystemBackedStorageService;
import com.knecon.fforesight.service.layoutparser.server.utils.BaseTest;
import lombok.SneakyThrows;
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(PdfSegmentationServiceTest.TestConfiguration.class)
public class PdfSegmentationServiceTest {
public class PdfSegmentationServiceTest extends BaseTest {
@Autowired
private PdfParsingService pdfParsingService;
@ -64,9 +45,6 @@ public class PdfSegmentationServiceTest {
@Autowired
private ObjectMapper objectMapper;
@MockBean
private RabbitTemplate rabbitTemplate;
@Autowired
private RedactManagerClassificationService redactManagerClassificationService;
@ -79,21 +57,6 @@ public class PdfSegmentationServiceTest {
@Autowired
private SectionsBuilderService sectionsBuilderService;
@Configuration
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
@ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)})
public static class TestConfiguration {
@Bean
@Primary
public StorageService inmemoryStorage() {
return new FileSystemBackedStorageService();
}
}
public ClassificationDocument buildClassificationDocument(PDDocument originDocument) {
ClassificationDocument classificationDocument = pdfParsingService.parseDocument(LayoutParsingType.REDACT_MANAGER,

View File

@ -7,15 +7,14 @@ import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import com.knecon.fforesight.service.layoutparser.processor.model.PageInformation;
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
import com.knecon.fforesight.service.layoutparser.processor.services.InvisibleTableDetectionService;
import com.knecon.fforesight.service.layoutparser.processor.services.PageInformationService;
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
import com.knecon.fforesight.service.layoutparser.processor.services.TextPositionSequenceSorter;
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
import com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw;
import lombok.SneakyThrows;
@ -23,7 +22,7 @@ import lombok.SneakyThrows;
class InvisibleTableDetectionServiceTest {
@Test
@Disabled
// @Disabled
@SneakyThrows
public void detectInvisibleTableTest() {

View File

@ -24,14 +24,13 @@ import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.Document
import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.Page;
import com.knecon.fforesight.service.layoutparser.processor.graph.textblock.AtomicTextBlock;
import com.knecon.fforesight.service.layoutparser.processor.graph.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
import com.knecon.fforesight.service.layoutparser.processor.utils.PdfVisualisationUtility;
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
import lombok.experimental.UtilityClass;
@ -41,20 +40,17 @@ public class PdfDraw {
public static void drawRectanglesPerPage(String filename, List<List<Rectangle2D>> rectanglesPerPage, String tmpFileName) throws IOException {
try (InputStream inputStream = new ClassPathResource(filename).getInputStream()) {
PDDocument pdDocument = Loader.loadPDF(inputStream);
try (InputStream inputStream = new ClassPathResource(filename).getInputStream();//
PDDocument pdDocument = Loader.loadPDF(inputStream);//
var out = new FileOutputStream(tmpFileName)//
) {
for (int pageNumber = 1; pageNumber < pdDocument.getNumberOfPages() + 1; pageNumber++) {
PdfVisualisationUtility.drawRectangle2DList(pdDocument,
pageNumber,
rectanglesPerPage.get(pageNumber - 1),
PdfVisualisationUtility.Options.builder().stroke(true).build());
}
try (var out = new FileOutputStream(tmpFileName)) {
pdDocument.save(out);
pdDocument.close();
}
pdDocument.save(out);
}
}
@ -62,8 +58,10 @@ public class PdfDraw {
public static void drawRectanglesPerPageNumberedByLine(String filename, List<List<List<Rectangle2D>>> rectanglesPerPage, String tmpFileName) throws IOException {
try (InputStream inputStream = new ClassPathResource(filename).getInputStream()) {
PDDocument pdDocument = Loader.loadPDF(inputStream);
try (InputStream inputStream = new ClassPathResource(filename).getInputStream();//
PDDocument pdDocument = Loader.loadPDF(inputStream);//
var out = new FileOutputStream(tmpFileName)//
) {
for (int pageNumber = 1; pageNumber < pdDocument.getNumberOfPages() + 1; pageNumber++) {
var rectanglesOnPage = rectanglesPerPage.get(pageNumber - 1);
@ -80,10 +78,8 @@ public class PdfDraw {
}
}
try (var out = new FileOutputStream(tmpFileName)) {
pdDocument.save(out);
pdDocument.close();
}
pdDocument.save(out);
}
@ -92,15 +88,18 @@ public class PdfDraw {
private static int countNumberOfDigits(int num) {
if (num == 0) {
int final_num = num;
if (final_num == 0) {
return 1;
}
int count = 0;
for (; num != 0; num /= 10, ++count) {
for (; final_num != 0; final_num /= 10) {
count++;
}
return count;
}
public static void drawDocumentGraph(PDDocument document, Document documentGraph) {
documentGraph.getDocumentTree().allEntriesInOrder().forEach(entry -> drawNode(document, entry));
@ -183,10 +182,12 @@ public class PdfDraw {
@SneakyThrows
public static void drawRectanglesAndLinesPerPage(String filename, List<List<Rectangle2D>> list, List<List<Rectangle2D>> rectanglesPerPage, String tmpFileName) {
public static void drawRectanglesAndLinesPerPage(String filename, List<List<Rectangle2D>> list, List<List<Rectangle2D>> rectanglesPerPage, String tmpFileName) {
try (InputStream inputStream = new ClassPathResource(filename).getInputStream()) {
PDDocument pdDocument = Loader.loadPDF(inputStream);
try (InputStream inputStream = new ClassPathResource(filename).getInputStream();//
PDDocument pdDocument = Loader.loadPDF(inputStream);//
var out = new FileOutputStream(tmpFileName)//
) {
for (int pageNumber = 1; pageNumber < pdDocument.getNumberOfPages() + 1; pageNumber++) {
// PdfVisualisationUtility.drawLine2DList(pdDocument,
@ -197,15 +198,9 @@ public class PdfDraw {
pageNumber,
rectanglesPerPage.get(pageNumber - 1),
PdfVisualisationUtility.Options.builder().stroke(true).build());
PdfVisualisationUtility.drawRectangle2DList(pdDocument,
pageNumber,
list.get(pageNumber - 1),
PdfVisualisationUtility.Options.builder().stroke(true).build());
}
try (var out = new FileOutputStream(tmpFileName)) {
pdDocument.save(out);
pdDocument.close();
PdfVisualisationUtility.drawRectangle2DList(pdDocument, pageNumber, list.get(pageNumber - 1), PdfVisualisationUtility.Options.builder().stroke(true).build());
}
pdDocument.save(out);
}
}
@ -213,19 +208,17 @@ public class PdfDraw {
@Builder
@AllArgsConstructor
@NoArgsConstructor
@Getter
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public static class Options {
@Builder.Default
boolean stroke = false;
boolean stroke;
@Builder.Default
Color strokeColor = Color.BLACK;
@Builder.Default
float strokeWidth = 1f;
@Builder.Default
boolean fill = false;
boolean fill;
@Builder.Default
Color fillColor = Color.BLACK;
@ -250,13 +243,19 @@ public class PdfDraw {
private static void drawBBoxAndLabelAndNumberOnPage(PDDocument document, DocumentTree.Entry entry, Options options) {
Map<Page, Rectangle2D> rectanglesPerPage = entry.getNode().getBBox();
rectanglesPerPage.forEach((page, rectangle2D) -> {
for (Page page : rectanglesPerPage.keySet()) {
Rectangle2D rectangle2D = rectanglesPerPage.get(page);
if (entry.getType() == NodeType.SECTION) {
rectangle2D = RectangleTransformations.pad(rectangle2D, 10, 10);
}
drawRectangle2DList(document, page.getNumber(), List.of(rectangle2D), options);
drawText(buildString(entry), document, new Point2D.Double(rectangle2D.getMinX(), rectangle2D.getMaxY() + 2), page.getNumber(), options, entry.getType() == NodeType.TABLE_CELL);
});
drawText(buildString(entry),
document,
new Point2D.Double(rectangle2D.getMinX(), rectangle2D.getMaxY() + 2),
page.getNumber(),
options,
entry.getType() == NodeType.TABLE_CELL);
}
}

View File

@ -31,7 +31,3 @@ management:
prometheus.enabled: ${monitoring.enabled:false}
health.enabled: true
endpoints.web.exposure.include: prometheus, health
storage:
backend: 's3'

View File

@ -1,120 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>3.0.6</version>
<relativePath></relativePath>
</parent>
<groupId>com.knecon.fforesight</groupId>
<artifactId>layoutparser-service</artifactId>
<version>0.1-SNAPSHOT</version>
<packaging>pom</packaging>
<modules>
<module>layoutparser-service-processor</module>
<module>layoutparser-service-internal-api</module>
<module>layoutparser-service-server</module>
</modules>
<properties>
<java.version>17</java.version>
<pdfbox.version>3.0.0-alpha2</pdfbox.version>
<guava.version>31.1-jre</guava.version>
<jackson.version>2.15.0-rc2</jackson.version>
<tennat-commons.version>0.10.0</tennat-commons.version>
<storage-commons.version>2.1.0</storage-commons.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.28</version>
<scope>provided</scope>
</dependency>
</dependencies>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-dependencies</artifactId>
<version>2022.0.2</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<build>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.sonarsource.scanner.maven</groupId>
<artifactId>sonar-maven-plugin</artifactId>
<version>3.9.0.2155</version>
</plugin>
<plugin>
<groupId>org.owasp</groupId>
<artifactId>dependency-check-maven</artifactId>
<version>6.3.1</version>
<configuration>
<format>ALL</format>
</configuration>
</plugin>
<plugin>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<executions>
<execution>
<id>prepare-agent</id>
<goals>
<goal>prepare-agent</goal>
</goals>
</execution>
<execution>
<id>report</id>
<goals>
<goal>report</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</pluginManagement>
<plugins>
<plugin>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<version>0.8.8</version>
<executions>
<execution>
<id>prepare-agent</id>
<goals>
<goal>prepare-agent</goal>
</goals>
</execution>
<execution>
<id>report</id>
<goals>
<goal>report-aggregate</goal>
</goals>
<phase>verify</phase>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

19
pom.xml
View File

@ -1,19 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.knecon.fforesight</groupId>
<artifactId>layoutparser</artifactId>
<version>0.1-SNAPSHOT</version>
<modules>
<module>layoutparser-service</module>
<module>layoutparser-service-image</module>
</modules>
<packaging>pom</packaging>
</project>

13
settings.gradle.kts Normal file
View File

@ -0,0 +1,13 @@
/*
* This file was generated by the Gradle 'init' task.
*
* This project uses @Incubating APIs which are subject to change.
*/
rootProject.name = "layoutparser"
include(":layoutparser-service-server")
include(":layoutparser-service-processor")
include(":layoutparser-service-internal-api")
project(":layoutparser-service-server").projectDir = file("layoutparser-service/layoutparser-service-server")
project(":layoutparser-service-processor").projectDir = file("layoutparser-service/layoutparser-service-processor")
project(":layoutparser-service-internal-api").projectDir = file("layoutparser-service/layoutparser-service-internal-api")