From e920eb5a788d64811288f6ff8cbe605e983a9454 Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Mon, 1 Jul 2024 13:56:16 +0200 Subject: [PATCH] CLARI-003: add treeId to StructureObject --- .../api/data/taas/StructureObject.java | 4 ++ ...tParsingServiceProcessorConfiguration.java | 2 +- .../mapper/TaasDocumentDataMapper.java | 2 + .../build.gradle.kts | 1 + .../server/PDFNetInitializer.java | 45 +++++++++++++++++++ .../pdftron/PDFTronViewerDocumentService.java | 4 -- 6 files changed, 53 insertions(+), 5 deletions(-) create mode 100644 layoutparser-service/layoutparser-service-server/src/main/java/com/knecon/fforesight/service/layoutparser/server/PDFNetInitializer.java diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/StructureObject.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/StructureObject.java index 74eb470..dd83fde 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/StructureObject.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/taas/StructureObject.java @@ -1,5 +1,7 @@ package com.knecon.fforesight.service.layoutparser.internal.api.data.taas; +import java.util.List; + import io.swagger.v3.oas.annotations.media.Schema; import lombok.AllArgsConstructor; import lombok.Builder; @@ -13,6 +15,8 @@ public class StructureObject { @Schema(description = "The ID of this StructureObject.") Integer structureObjectNumber; + @Schema(description = "The Tree ID of this StructureObject.") + List treeId; @Schema(description = "This value indicates the start of the string offsets in this Object, with respect to the reading order.") int page; @Schema(description = "This stringOffset indicates the start of the string offsets in this Object, with respect to the reading order of the entire document. It is equal to the previous' StructureObject stringOffset + its length.") diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingServiceProcessorConfiguration.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingServiceProcessorConfiguration.java index 00e80f0..7ba862f 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingServiceProcessorConfiguration.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingServiceProcessorConfiguration.java @@ -21,7 +21,7 @@ public class LayoutParsingServiceProcessorConfiguration { public IViewerDocumentService viewerDocumentService(ObservationRegistry registry, LayoutparserSettings settings) { if (!Strings.isNullOrEmpty(settings.getPdftronLicense())) { - return new PDFTronViewerDocumentService(registry, settings.getPdftronLicense()); + return new PDFTronViewerDocumentService(registry); } else { return new ViewerDocumentService(registry); } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/TaasDocumentDataMapper.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/TaasDocumentDataMapper.java index 1978503..cee038c 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/TaasDocumentDataMapper.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/TaasDocumentDataMapper.java @@ -99,6 +99,7 @@ public class TaasDocumentDataMapper { Page page = semanticNode.getFirstPage(); Rectangle2D bBox = semanticNode.getBBox().get(page); return StructureObject.builder() + .treeId(semanticNode.getTreeId()) .structureObjectNumber(structureObjectNumber) .boundingBox(toFloatArray(bBox)) .stringOffset(semanticNode.getBoundary().start()) @@ -114,6 +115,7 @@ public class TaasDocumentDataMapper { Page page = table.getFirstPage(); Rectangle2D bBox = table.getBBox().get(page); return StructureObject.builder() + .treeId(table.getTreeId()) .structureObjectNumber(structureObjectNumber) .boundingBox(toFloatArray(bBox)) .stringOffset(table.getBoundary().start()) diff --git a/layoutparser-service/layoutparser-service-server/build.gradle.kts b/layoutparser-service/layoutparser-service-server/build.gradle.kts index cc20ea7..1f6983e 100644 --- a/layoutparser-service/layoutparser-service-server/build.gradle.kts +++ b/layoutparser-service/layoutparser-service-server/build.gradle.kts @@ -37,6 +37,7 @@ dependencies { implementation("com.amazonaws:aws-java-sdk-s3:1.12.536") implementation("org.springframework.cloud:spring-cloud-starter-openfeign:4.0.4") implementation("net.logstash.logback:logstash-logback-encoder:7.4") + implementation("com.pdftron:PDFNet:10.5.0") // for integration testing only testImplementation(project(":viewer-doc-processor")) diff --git a/layoutparser-service/layoutparser-service-server/src/main/java/com/knecon/fforesight/service/layoutparser/server/PDFNetInitializer.java b/layoutparser-service/layoutparser-service-server/src/main/java/com/knecon/fforesight/service/layoutparser/server/PDFNetInitializer.java new file mode 100644 index 0000000..604b094 --- /dev/null +++ b/layoutparser-service/layoutparser-service-server/src/main/java/com/knecon/fforesight/service/layoutparser/server/PDFNetInitializer.java @@ -0,0 +1,45 @@ +package com.knecon.fforesight.service.layoutparser.server; + +import org.springframework.stereotype.Component; + +import com.google.common.base.Strings; +import com.knecon.fforesight.service.layoutparser.processor.LayoutparserSettings; +import com.pdftron.pdf.PDFNet; + +import jakarta.annotation.PostConstruct; +import jakarta.annotation.PreDestroy; +import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Component +@RequiredArgsConstructor +public class PDFNetInitializer { + + private final LayoutparserSettings settings; + + + @SneakyThrows + @PostConstruct + // Do not change back to application runner, if it is application runner it takes messages from the queue before PDFNet is initialized, that leads to UnsatisfiedLinkError. + public void init() { + + if (Strings.isNullOrEmpty(settings.getPdftronLicense())) { + return; + } + log.info("Initializing Native Libraries"); + log.info("Setting pdftron license: {}", settings.getPdftronLicense()); + PDFNet.setTempPath("/tmp/pdftron"); + PDFNet.initialize(settings.getPdftronLicense()); + + } + + + @PreDestroy + public void terminate() { + + PDFNet.terminate(); + } + +} diff --git a/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/pdftron/PDFTronViewerDocumentService.java b/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/pdftron/PDFTronViewerDocumentService.java index 424b02c..dcbb69a 100644 --- a/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/pdftron/PDFTronViewerDocumentService.java +++ b/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/pdftron/PDFTronViewerDocumentService.java @@ -26,7 +26,6 @@ import com.pdftron.pdf.ElementReader; import com.pdftron.pdf.ElementWriter; import com.pdftron.pdf.Font; import com.pdftron.pdf.PDFDoc; -import com.pdftron.pdf.PDFNet; import com.pdftron.pdf.Page; import com.pdftron.pdf.PageIterator; import com.pdftron.pdf.ocg.Group; @@ -43,7 +42,6 @@ import lombok.extern.slf4j.Slf4j; public class PDFTronViewerDocumentService implements IViewerDocumentService { private final ObservationRegistry registry; - private final String pdftronLicense; @Override @@ -51,7 +49,6 @@ public class PDFTronViewerDocumentService implements IViewerDocumentService { @SneakyThrows public synchronized void addVisualizationsOnPage(File originFile, File destinationFile, List visualizations) { - PDFNet.initialize(pdftronLicense); // originFile and destinationFile might be the same, so we use a temp file. // Otherwise, saving the document might corrupt the file @@ -108,7 +105,6 @@ public class PDFTronViewerDocumentService implements IViewerDocumentService { saveDocument(pdfDoc, destinationFile); } - PDFNet.terminate(); }