RED-9353: refactor PDFTronViewerDocumentService
* rename RedactionEntity -> TextEntity * rename Boundary -> TextRange
This commit is contained in:
parent
ad38618dc7
commit
203436c62f
@ -8,7 +8,6 @@ import org.springframework.stereotype.Service;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Headline;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Image;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Key;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Paragraph;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.visualization.LayoutGrid;
|
||||
@ -57,7 +56,6 @@ public class LayoutGridService {
|
||||
case TABLE -> layoutGrid.addTable((Table) semanticNode);
|
||||
case IMAGE -> layoutGrid.addImage((Image) semanticNode);
|
||||
case HEADER, FOOTER -> layoutGrid.addHeaderOrFooter(semanticNode);
|
||||
case KEY -> layoutGrid.addKey((Key) semanticNode);
|
||||
}
|
||||
});
|
||||
return layoutGrid;
|
||||
|
||||
@ -19,7 +19,6 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.No
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.DuplicatedParagraph;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Headline;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Image;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Key;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Paragraph;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
|
||||
@ -70,28 +69,6 @@ public class LayoutGrid extends LayoutGridLayerConfig {
|
||||
}
|
||||
|
||||
|
||||
public void addKey(Key key) {
|
||||
|
||||
addAsRectangle(key, keyValue, KEY_COLOR);
|
||||
if (key.getValue() != null) {
|
||||
addAsRectangle(key.getValue(), keyValue, VALUE_COLOR);
|
||||
|
||||
if (key.getFirstPage() != key.getValue().getFirstPage()) {
|
||||
return;
|
||||
}
|
||||
|
||||
Line2D line = ConnectionLineUtil.findClosestMidpointLine(key.getBBox().get(key.getFirstPage()), key.getValue().getBBox().get(key.getFirstPage()));
|
||||
|
||||
Line2D[] arrowHead = ConnectionLineUtil.createArrowHead(line, Math.min(ConnectionLineUtil.length(line), 5));
|
||||
List<ColoredLine> linesOnPage = getOrCreateVisualizationsOnPage(key.getFirstPage().getNumber(), keyValue).getColoredLines();
|
||||
linesOnPage.add(new ColoredLine(line, KEY_VALUE_BBOX_COLOR, LINE_WIDTH));
|
||||
linesOnPage.add(new ColoredLine(arrowHead[0], KEY_VALUE_BBOX_COLOR, LINE_WIDTH));
|
||||
linesOnPage.add(new ColoredLine(arrowHead[1], KEY_VALUE_BBOX_COLOR, LINE_WIDTH));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void addHeadline(Headline headline) {
|
||||
|
||||
addAsRectangle(headline, headlines, HEADLINE_COLOR);
|
||||
|
||||
@ -38,7 +38,7 @@ dependencies {
|
||||
implementation("com.amazonaws:aws-java-sdk-s3:1.12.536")
|
||||
implementation("org.springframework.cloud:spring-cloud-starter-openfeign:4.0.4")
|
||||
implementation("net.logstash.logback:logstash-logback-encoder:7.4")
|
||||
implementation("com.pdftron:PDFNet:10.5.0")
|
||||
implementation("com.pdftron:PDFNet:10.7.0")
|
||||
|
||||
// for integration testing only
|
||||
testImplementation(project(":viewer-doc-processor"))
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import com.google.common.base.Strings;
|
||||
@ -17,7 +18,8 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@RequiredArgsConstructor
|
||||
public class PDFNetInitializer {
|
||||
|
||||
private final LayoutparserSettings settings;
|
||||
@Value("${pdftron.license:}")
|
||||
private String pdftronLicense;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
@ -25,13 +27,13 @@ public class PDFNetInitializer {
|
||||
// Do not change back to application runner, if it is application runner it takes messages from the queue before PDFNet is initialized, that leads to UnsatisfiedLinkError.
|
||||
public void init() {
|
||||
|
||||
if (Strings.isNullOrEmpty(settings.getPdftronLicense())) {
|
||||
if (Strings.isNullOrEmpty(pdftronLicense)) {
|
||||
return;
|
||||
}
|
||||
log.info("Initializing Native Libraries");
|
||||
log.info("Setting pdftron license: {}", settings.getPdftronLicense());
|
||||
log.info("Setting pdftron license: {}", pdftronLicense);
|
||||
PDFNet.setTempPath("/tmp/pdftron");
|
||||
PDFNet.initialize(settings.getPdftronLicense());
|
||||
PDFNet.initialize(pdftronLicense);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -12,6 +12,7 @@ import java.util.Map;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
@ -34,6 +35,7 @@ import com.knecon.fforesight.service.layoutparser.processor.services.visualizati
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.AbstractTest;
|
||||
import com.knecon.fforesight.service.viewerdoc.service.PDFTronViewerDocumentService;
|
||||
|
||||
import jakarta.annotation.PostConstruct;
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
public class OutlineDetectionTest extends AbstractTest {
|
||||
@ -44,6 +46,16 @@ public class OutlineDetectionTest extends AbstractTest {
|
||||
@Autowired
|
||||
protected LayoutParsingPipeline layoutParsingPipeline;
|
||||
|
||||
@Autowired
|
||||
PDFNetInitializer pdfNetInitializer;
|
||||
|
||||
|
||||
@BeforeEach
|
||||
public void init() {
|
||||
|
||||
pdfNetInitializer.init();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
@ -60,28 +72,17 @@ public class OutlineDetectionTest extends AbstractTest {
|
||||
|
||||
OutlineObjectTree outlineObjectTree = classificationDocument.getOutlineObjectTree();
|
||||
assertEquals(outlineObjectTree.getRootNodes().size(), 8);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage()
|
||||
.get(1).size(), 1);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage()
|
||||
.get(3).size(), 1);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage()
|
||||
.get(4).size(), 1);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage()
|
||||
.get(5).size(), 1);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage()
|
||||
.get(6).size(), 2);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage()
|
||||
.get(7).size(), 3);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage()
|
||||
.get(8).size(), 2);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage()
|
||||
.get(10).size(), 1);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage()
|
||||
.get(11).size(), 4);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage()
|
||||
.get(12).size(), 1);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage()
|
||||
.get(13).size(), 2);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage().get(1).size(), 1);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage().get(3).size(), 1);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage().get(4).size(), 1);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage().get(5).size(), 1);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage().get(6).size(), 2);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage().get(7).size(), 3);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage().get(8).size(), 2);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage().get(10).size(), 1);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage().get(11).size(), 4);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage().get(12).size(), 1);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage().get(13).size(), 2);
|
||||
assertTrue(outlineObjectTree.getOutlineObjectsPerPage().values()
|
||||
.stream()
|
||||
.flatMap(Collection::stream)
|
||||
@ -98,29 +99,15 @@ public class OutlineDetectionTest extends AbstractTest {
|
||||
.stream()
|
||||
.map(outlineObjectTreeNode -> sanitizeString(outlineObjectTreeNode.getOutlineObject().getTitle()))
|
||||
.toList());
|
||||
assertEquals(tableOfContents.getMainSections()
|
||||
.get(5).getChildren().size(), 6);
|
||||
assertEquals(tableOfContents.getMainSections()
|
||||
.get(7).getChildren().size(), 3);
|
||||
assertEquals(tableOfContents.getMainSections()
|
||||
.get(8).getChildren().size(), 3);
|
||||
assertEquals(tableOfContents.getMainSections()
|
||||
.get(8).getChildren()
|
||||
.get(2).getChildren().size(), 1);
|
||||
assertEquals(tableOfContents.getMainSections()
|
||||
.get(8).getChildren()
|
||||
.get(2).getChildren()
|
||||
.get(0).getChildren().size(), 3);
|
||||
assertEquals(tableOfContents.getMainSections().get(5).getChildren().size(), 6);
|
||||
assertEquals(tableOfContents.getMainSections().get(7).getChildren().size(), 3);
|
||||
assertEquals(tableOfContents.getMainSections().get(8).getChildren().size(), 3);
|
||||
assertEquals(tableOfContents.getMainSections().get(8).getChildren().get(2).getChildren().size(), 1);
|
||||
assertEquals(tableOfContents.getMainSections().get(8).getChildren().get(2).getChildren().get(0).getChildren().size(), 3);
|
||||
|
||||
assertEquals(tableOfContents.getMainSections()
|
||||
.get(0).getImages().size(), 1);
|
||||
assertEquals(tableOfContents.getMainSections()
|
||||
.get(6).getImages().size(), 1);
|
||||
assertEquals(tableOfContents.getMainSections()
|
||||
.get(8).getChildren()
|
||||
.get(2).getChildren()
|
||||
.get(0).getChildren()
|
||||
.get(2).getImages().size(), 1);
|
||||
assertEquals(tableOfContents.getMainSections().get(0).getImages().size(), 1);
|
||||
assertEquals(tableOfContents.getMainSections().get(6).getImages().size(), 1);
|
||||
assertEquals(tableOfContents.getMainSections().get(8).getChildren().get(2).getChildren().get(0).getChildren().get(2).getImages().size(), 1);
|
||||
|
||||
Document document = buildGraph(fileName, classificationDocument);
|
||||
|
||||
@ -159,17 +146,14 @@ public class OutlineDetectionTest extends AbstractTest {
|
||||
.count(), 3 + 1);
|
||||
assertEquals(childrenOfTypeSectionOrSuperSection.get(8).streamChildren()
|
||||
.filter(isSectionOrSuperSection)
|
||||
.toList()
|
||||
.get(3).streamChildren()
|
||||
.toList().get(3).streamChildren()
|
||||
.filter(isSectionOrSuperSection)
|
||||
.count(), 1 + 1);
|
||||
assertEquals(childrenOfTypeSectionOrSuperSection.get(8).streamChildren()
|
||||
.filter(isSectionOrSuperSection)
|
||||
.toList()
|
||||
.get(3).streamChildren()
|
||||
.toList().get(3).streamChildren()
|
||||
.filter(isSectionOrSuperSection)
|
||||
.toList()
|
||||
.get(1).streamChildren()
|
||||
.toList().get(1).streamChildren()
|
||||
.filter(isSectionOrSuperSection)
|
||||
.count(), 3 + 1);
|
||||
|
||||
|
||||
@ -4,11 +4,18 @@ import java.io.File;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Map;
|
||||
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.mockito.MockitoAnnotations;
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
@ -16,17 +23,30 @@ import com.knecon.fforesight.service.layoutparser.processor.python_api.model.tab
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.visualization.LayoutGridService;
|
||||
import com.knecon.fforesight.service.layoutparser.server.PDFNetInitializer;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest;
|
||||
import com.knecon.fforesight.service.viewerdoc.service.PDFTronViewerDocumentService;
|
||||
import com.knecon.fforesight.tenantcommons.TenantsClient;
|
||||
import com.pdftron.pdf.PDFNet;
|
||||
|
||||
import jakarta.annotation.PostConstruct;
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
public class ViewerDocumentTest extends BuildDocumentTest {
|
||||
|
||||
@Autowired
|
||||
PDFNetInitializer pdfNetInitializer;
|
||||
PDFTronViewerDocumentService viewerDocumentService = new PDFTronViewerDocumentService(null);
|
||||
LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
|
||||
|
||||
|
||||
@BeforeEach
|
||||
public void init() {
|
||||
|
||||
pdfNetInitializer.init();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testViewerDocument() {
|
||||
|
||||
@ -9,7 +9,5 @@ storage:
|
||||
key: minioadmin
|
||||
secret: minioadmin
|
||||
|
||||
layoutparser:
|
||||
debug: true
|
||||
pdftron-license: demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a
|
||||
|
||||
|
||||
@ -28,6 +28,11 @@ spring:
|
||||
max-interval: 15000
|
||||
prefetch: 1
|
||||
|
||||
layoutparser:
|
||||
debug: true
|
||||
|
||||
pdftron.license: demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a
|
||||
|
||||
management:
|
||||
endpoint:
|
||||
metrics.enabled: ${monitoring.enabled:false}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user