CLARI-003: add treeId to StructureObject
This commit is contained in:
parent
7e4baea7e5
commit
e920eb5a78
@ -1,5 +1,7 @@
|
||||
package com.knecon.fforesight.service.layoutparser.internal.api.data.taas;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
@ -13,6 +15,8 @@ public class StructureObject {
|
||||
|
||||
@Schema(description = "The ID of this StructureObject.")
|
||||
Integer structureObjectNumber;
|
||||
@Schema(description = "The Tree ID of this StructureObject.")
|
||||
List<Integer> treeId;
|
||||
@Schema(description = "This value indicates the start of the string offsets in this Object, with respect to the reading order.")
|
||||
int page;
|
||||
@Schema(description = "This stringOffset indicates the start of the string offsets in this Object, with respect to the reading order of the entire document. It is equal to the previous' StructureObject stringOffset + its length.")
|
||||
|
||||
@ -21,7 +21,7 @@ public class LayoutParsingServiceProcessorConfiguration {
|
||||
public IViewerDocumentService viewerDocumentService(ObservationRegistry registry, LayoutparserSettings settings) {
|
||||
|
||||
if (!Strings.isNullOrEmpty(settings.getPdftronLicense())) {
|
||||
return new PDFTronViewerDocumentService(registry, settings.getPdftronLicense());
|
||||
return new PDFTronViewerDocumentService(registry);
|
||||
} else {
|
||||
return new ViewerDocumentService(registry);
|
||||
}
|
||||
|
||||
@ -99,6 +99,7 @@ public class TaasDocumentDataMapper {
|
||||
Page page = semanticNode.getFirstPage();
|
||||
Rectangle2D bBox = semanticNode.getBBox().get(page);
|
||||
return StructureObject.builder()
|
||||
.treeId(semanticNode.getTreeId())
|
||||
.structureObjectNumber(structureObjectNumber)
|
||||
.boundingBox(toFloatArray(bBox))
|
||||
.stringOffset(semanticNode.getBoundary().start())
|
||||
@ -114,6 +115,7 @@ public class TaasDocumentDataMapper {
|
||||
Page page = table.getFirstPage();
|
||||
Rectangle2D bBox = table.getBBox().get(page);
|
||||
return StructureObject.builder()
|
||||
.treeId(table.getTreeId())
|
||||
.structureObjectNumber(structureObjectNumber)
|
||||
.boundingBox(toFloatArray(bBox))
|
||||
.stringOffset(table.getBoundary().start())
|
||||
|
||||
@ -37,6 +37,7 @@ dependencies {
|
||||
implementation("com.amazonaws:aws-java-sdk-s3:1.12.536")
|
||||
implementation("org.springframework.cloud:spring-cloud-starter-openfeign:4.0.4")
|
||||
implementation("net.logstash.logback:logstash-logback-encoder:7.4")
|
||||
implementation("com.pdftron:PDFNet:10.5.0")
|
||||
|
||||
// for integration testing only
|
||||
testImplementation(project(":viewer-doc-processor"))
|
||||
|
||||
@ -0,0 +1,45 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server;
|
||||
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import com.google.common.base.Strings;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutparserSettings;
|
||||
import com.pdftron.pdf.PDFNet;
|
||||
|
||||
import jakarta.annotation.PostConstruct;
|
||||
import jakarta.annotation.PreDestroy;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
public class PDFNetInitializer {
|
||||
|
||||
private final LayoutparserSettings settings;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
@PostConstruct
|
||||
// Do not change back to application runner, if it is application runner it takes messages from the queue before PDFNet is initialized, that leads to UnsatisfiedLinkError.
|
||||
public void init() {
|
||||
|
||||
if (Strings.isNullOrEmpty(settings.getPdftronLicense())) {
|
||||
return;
|
||||
}
|
||||
log.info("Initializing Native Libraries");
|
||||
log.info("Setting pdftron license: {}", settings.getPdftronLicense());
|
||||
PDFNet.setTempPath("/tmp/pdftron");
|
||||
PDFNet.initialize(settings.getPdftronLicense());
|
||||
|
||||
}
|
||||
|
||||
|
||||
@PreDestroy
|
||||
public void terminate() {
|
||||
|
||||
PDFNet.terminate();
|
||||
}
|
||||
|
||||
}
|
||||
@ -26,7 +26,6 @@ import com.pdftron.pdf.ElementReader;
|
||||
import com.pdftron.pdf.ElementWriter;
|
||||
import com.pdftron.pdf.Font;
|
||||
import com.pdftron.pdf.PDFDoc;
|
||||
import com.pdftron.pdf.PDFNet;
|
||||
import com.pdftron.pdf.Page;
|
||||
import com.pdftron.pdf.PageIterator;
|
||||
import com.pdftron.pdf.ocg.Group;
|
||||
@ -43,7 +42,6 @@ import lombok.extern.slf4j.Slf4j;
|
||||
public class PDFTronViewerDocumentService implements IViewerDocumentService {
|
||||
|
||||
private final ObservationRegistry registry;
|
||||
private final String pdftronLicense;
|
||||
|
||||
|
||||
@Override
|
||||
@ -51,7 +49,6 @@ public class PDFTronViewerDocumentService implements IViewerDocumentService {
|
||||
@SneakyThrows
|
||||
public synchronized void addVisualizationsOnPage(File originFile, File destinationFile, List<Visualizations> visualizations) {
|
||||
|
||||
PDFNet.initialize(pdftronLicense);
|
||||
|
||||
// originFile and destinationFile might be the same, so we use a temp file.
|
||||
// Otherwise, saving the document might corrupt the file
|
||||
@ -108,7 +105,6 @@ public class PDFTronViewerDocumentService implements IViewerDocumentService {
|
||||
saveDocument(pdfDoc, destinationFile);
|
||||
}
|
||||
|
||||
PDFNet.terminate();
|
||||
}
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user