Merge branch 'CLARI-003' into 'main'

CLARI-003: add treeId to StructureObject

See merge request fforesight/layout-parser!176
This commit is contained in:
Kilian Schüttler 2024-07-02 11:37:22 +02:00
commit 3173610be5
6 changed files with 53 additions and 5 deletions

View File

@ -1,5 +1,7 @@
package com.knecon.fforesight.service.layoutparser.internal.api.data.taas;
import java.util.List;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.AllArgsConstructor;
import lombok.Builder;
@ -13,6 +15,8 @@ public class StructureObject {
@Schema(description = "The ID of this StructureObject.")
Integer structureObjectNumber;
@Schema(description = "The Tree ID of this StructureObject.")
List<Integer> treeId;
@Schema(description = "This value indicates the start of the string offsets in this Object, with respect to the reading order.")
int page;
@Schema(description = "This stringOffset indicates the start of the string offsets in this Object, with respect to the reading order of the entire document. It is equal to the previous' StructureObject stringOffset + its length.")

View File

@ -21,7 +21,7 @@ public class LayoutParsingServiceProcessorConfiguration {
public IViewerDocumentService viewerDocumentService(ObservationRegistry registry, LayoutparserSettings settings) {
if (!Strings.isNullOrEmpty(settings.getPdftronLicense())) {
return new PDFTronViewerDocumentService(registry, settings.getPdftronLicense());
return new PDFTronViewerDocumentService(registry);
} else {
return new ViewerDocumentService(registry);
}

View File

@ -99,6 +99,7 @@ public class TaasDocumentDataMapper {
Page page = semanticNode.getFirstPage();
Rectangle2D bBox = semanticNode.getBBox().get(page);
return StructureObject.builder()
.treeId(semanticNode.getTreeId())
.structureObjectNumber(structureObjectNumber)
.boundingBox(toFloatArray(bBox))
.stringOffset(semanticNode.getBoundary().start())
@ -114,6 +115,7 @@ public class TaasDocumentDataMapper {
Page page = table.getFirstPage();
Rectangle2D bBox = table.getBBox().get(page);
return StructureObject.builder()
.treeId(table.getTreeId())
.structureObjectNumber(structureObjectNumber)
.boundingBox(toFloatArray(bBox))
.stringOffset(table.getBoundary().start())

View File

@ -37,6 +37,7 @@ dependencies {
implementation("com.amazonaws:aws-java-sdk-s3:1.12.536")
implementation("org.springframework.cloud:spring-cloud-starter-openfeign:4.0.4")
implementation("net.logstash.logback:logstash-logback-encoder:7.4")
implementation("com.pdftron:PDFNet:10.5.0")
// for integration testing only
testImplementation(project(":viewer-doc-processor"))

View File

@ -0,0 +1,45 @@
package com.knecon.fforesight.service.layoutparser.server;
import org.springframework.stereotype.Component;
import com.google.common.base.Strings;
import com.knecon.fforesight.service.layoutparser.processor.LayoutparserSettings;
import com.pdftron.pdf.PDFNet;
import jakarta.annotation.PostConstruct;
import jakarta.annotation.PreDestroy;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Component
@RequiredArgsConstructor
public class PDFNetInitializer {
private final LayoutparserSettings settings;
@SneakyThrows
@PostConstruct
// Do not change back to application runner, if it is application runner it takes messages from the queue before PDFNet is initialized, that leads to UnsatisfiedLinkError.
public void init() {
if (Strings.isNullOrEmpty(settings.getPdftronLicense())) {
return;
}
log.info("Initializing Native Libraries");
log.info("Setting pdftron license: {}", settings.getPdftronLicense());
PDFNet.setTempPath("/tmp/pdftron");
PDFNet.initialize(settings.getPdftronLicense());
}
@PreDestroy
public void terminate() {
PDFNet.terminate();
}
}

View File

@ -26,7 +26,6 @@ import com.pdftron.pdf.ElementReader;
import com.pdftron.pdf.ElementWriter;
import com.pdftron.pdf.Font;
import com.pdftron.pdf.PDFDoc;
import com.pdftron.pdf.PDFNet;
import com.pdftron.pdf.Page;
import com.pdftron.pdf.PageIterator;
import com.pdftron.pdf.ocg.Group;
@ -43,7 +42,6 @@ import lombok.extern.slf4j.Slf4j;
public class PDFTronViewerDocumentService implements IViewerDocumentService {
private final ObservationRegistry registry;
private final String pdftronLicense;
@Override
@ -51,7 +49,6 @@ public class PDFTronViewerDocumentService implements IViewerDocumentService {
@SneakyThrows
public synchronized void addVisualizationsOnPage(File originFile, File destinationFile, List<Visualizations> visualizations) {
PDFNet.initialize(pdftronLicense);
// originFile and destinationFile might be the same, so we use a temp file.
// Otherwise, saving the document might corrupt the file
@ -108,7 +105,6 @@ public class PDFTronViewerDocumentService implements IViewerDocumentService {
saveDocument(pdfDoc, destinationFile);
}
PDFNet.terminate();
}