From 7f675b41cfaec7be80f71f860c42be91498ac795 Mon Sep 17 00:00:00 2001 From: maverickstuder Date: Tue, 9 Apr 2024 16:53:57 +0200 Subject: [PATCH] RED-7074: Design Subsection section tree structure algorithm * first draft --- .../processor/LayoutParsingPipeline.java | 101 +++++++++++------- .../model/outline/OutlineObject.java | 25 +++++ .../model/outline/OutlineObjectTree.java | 49 +++++++++ .../model/outline/OutlineObjectTreeNode.java | 37 +++++++ .../services/OutlineExtractorService.java | 70 ++++++++++++ .../BlockificationPostprocessingService.java | 46 ++++++++ .../DocuMineClassificationService.java | 3 + .../RedactManagerClassificationService.java | 3 + .../server/graph/ViewerDocumentTest.java | 7 +- 9 files changed, 300 insertions(+), 41 deletions(-) create mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObject.java create mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObjectTree.java create mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObjectTreeNode.java create mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/OutlineExtractorService.java create mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/BlockificationPostprocessingService.java diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java index dfa0537..4b8df27 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java @@ -28,6 +28,9 @@ import com.knecon.fforesight.service.layoutparser.processor.model.Classification import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage; import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document; import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage; +import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObject; +import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObjectTree; +import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObjectTreeNode; import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell; import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings; import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock; @@ -40,10 +43,12 @@ import com.knecon.fforesight.service.layoutparser.processor.python_api.model.tab import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; import com.knecon.fforesight.service.layoutparser.processor.services.BodyTextFrameService; +import com.knecon.fforesight.service.layoutparser.processor.services.OutlineExtractorService; import com.knecon.fforesight.service.layoutparser.processor.services.RulingCleaningService; import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService; import com.knecon.fforesight.service.layoutparser.processor.services.SimplifiedSectionTextService; import com.knecon.fforesight.service.layoutparser.processor.services.TableExtractionService; +import com.knecon.fforesight.service.layoutparser.processor.services.blockification.BlockificationPostprocessingService; import com.knecon.fforesight.service.layoutparser.processor.services.blockification.DocstrumBlockificationService; import com.knecon.fforesight.service.layoutparser.processor.services.blockification.DocuMineBlockificationService; import com.knecon.fforesight.service.layoutparser.processor.services.blockification.RedactManagerBlockificationService; @@ -85,11 +90,13 @@ public class LayoutParsingPipeline { TableExtractionService tableExtractionService; DocuMineBlockificationService docuMineBlockificationService; RedactManagerBlockificationService redactManagerBlockificationService; + BlockificationPostprocessingService blockificationPostprocessingService; DocstrumBlockificationService docstrumBlockificationService; LayoutGridService layoutGridService; ObservationRegistry observationRegistry; VisualLayoutParsingAdapter visualLayoutParsingAdapter; ClarifyndClassificationService clarifyndClassificationService; + OutlineExtractorService outlineExtractorService; public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException { @@ -98,29 +105,36 @@ public class LayoutParsingPipeline { log.info("Starting layout parsing for {}", layoutParsingRequest.identifier()); File originFile = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId()); - File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()).orElse(originFile); + File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()) + .orElse(originFile); VisualLayoutParsingResponse visualLayoutParsingResponse = new VisualLayoutParsingResponse(); - if (layoutParsingRequest.visualLayoutParsingFileId().isPresent()) { - visualLayoutParsingResponse = layoutParsingStorageService.getVisualLayoutParsingFile(layoutParsingRequest.visualLayoutParsingFileId().get()); + if (layoutParsingRequest.visualLayoutParsingFileId() + .isPresent()) { + visualLayoutParsingResponse = layoutParsingStorageService.getVisualLayoutParsingFile(layoutParsingRequest.visualLayoutParsingFileId() + .get()); } ImageServiceResponse imageServiceResponse = new ImageServiceResponse(); - if (layoutParsingRequest.imagesFileStorageId().isPresent()) { - imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId().get()); + if (layoutParsingRequest.imagesFileStorageId() + .isPresent()) { + imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId() + .get()); } TableServiceResponse tableServiceResponse = new TableServiceResponse(); - if (layoutParsingRequest.tablesFileStorageId().isPresent()) { - tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId().get()); + if (layoutParsingRequest.tablesFileStorageId() + .isPresent()) { + tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId() + .get()); } ClassificationDocument classificationDocument = parseLayout(layoutParsingRequest.layoutParsingType(), - originFile, - imageServiceResponse, - tableServiceResponse, - visualLayoutParsingResponse, - layoutParsingRequest.identifier()); + originFile, + imageServiceResponse, + tableServiceResponse, + visualLayoutParsingResponse, + layoutParsingRequest.identifier()); log.info("Building document graph for {}", layoutParsingRequest.identifier()); @@ -152,25 +166,25 @@ public class LayoutParsingPipeline { .numberOfPages(documentGraph.getNumberOfPages()) .duration(System.currentTimeMillis() - start) .message(format(""" - Layout parsing has finished in %.02f s. - identifiers: %s - %s - Files have been saved with Ids: - Structure: %s - Text: %s - Positions: %s - PageData: %s - Simplified Text: %s - Viewer Doc: %s""", - ((float) (System.currentTimeMillis() - start)) / 1000, - layoutParsingRequest.identifier(), - buildSemanticNodeCountMessage(documentGraph.getNumberOfPages(), documentGraph.buildSemanticNodeCounts()), - layoutParsingRequest.structureFileStorageId(), - layoutParsingRequest.textBlockFileStorageId(), - layoutParsingRequest.positionBlockFileStorageId(), - layoutParsingRequest.pageFileStorageId(), - layoutParsingRequest.simplifiedTextStorageId(), - layoutParsingRequest.viewerDocumentStorageId())) + Layout parsing has finished in %.02f s. + identifiers: %s + %s + Files have been saved with Ids: + Structure: %s + Text: %s + Positions: %s + PageData: %s + Simplified Text: %s + Viewer Doc: %s""", + ((float) (System.currentTimeMillis() - start)) / 1000, + layoutParsingRequest.identifier(), + buildSemanticNodeCountMessage(documentGraph.getNumberOfPages(), documentGraph.buildSemanticNodeCounts()), + layoutParsingRequest.structureFileStorageId(), + layoutParsingRequest.textBlockFileStorageId(), + layoutParsingRequest.positionBlockFileStorageId(), + layoutParsingRequest.pageFileStorageId(), + layoutParsingRequest.simplifiedTextStorageId(), + layoutParsingRequest.viewerDocumentStorageId())) .build(); } @@ -191,14 +205,14 @@ public class LayoutParsingPipeline { private String buildSemanticNodeCountMessage(int numberOfPages, Map semanticNodeCounts) { return String.format("%d pages with %d sections, %d headlines, %d paragraphs, %d tables with %d cells, %d headers, and %d footers parsed", - numberOfPages, - semanticNodeCounts.get(NodeType.SECTION) == null ? 0 : semanticNodeCounts.get(NodeType.SECTION), - semanticNodeCounts.get(NodeType.HEADLINE) == null ? 0 : semanticNodeCounts.get(NodeType.HEADLINE), - semanticNodeCounts.get(NodeType.PARAGRAPH) == null ? 0 : semanticNodeCounts.get(NodeType.PARAGRAPH), - semanticNodeCounts.get(NodeType.TABLE) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE), - semanticNodeCounts.get(NodeType.TABLE_CELL) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE_CELL), - semanticNodeCounts.get(NodeType.HEADER) == null ? 0 : semanticNodeCounts.get(NodeType.HEADER), - semanticNodeCounts.get(NodeType.FOOTER) == null ? 0 : semanticNodeCounts.get(NodeType.FOOTER)); + numberOfPages, + semanticNodeCounts.get(NodeType.SECTION) == null ? 0 : semanticNodeCounts.get(NodeType.SECTION), + semanticNodeCounts.get(NodeType.HEADLINE) == null ? 0 : semanticNodeCounts.get(NodeType.HEADLINE), + semanticNodeCounts.get(NodeType.PARAGRAPH) == null ? 0 : semanticNodeCounts.get(NodeType.PARAGRAPH), + semanticNodeCounts.get(NodeType.TABLE) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE), + semanticNodeCounts.get(NodeType.TABLE_CELL) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE_CELL), + semanticNodeCounts.get(NodeType.HEADER) == null ? 0 : semanticNodeCounts.get(NodeType.HEADER), + semanticNodeCounts.get(NodeType.FOOTER) == null ? 0 : semanticNodeCounts.get(NodeType.FOOTER)); } @@ -213,6 +227,9 @@ public class LayoutParsingPipeline { PDDocument originDocument = openDocument(originFile); addNumberOfPagesToTrace(originDocument.getNumberOfPages(), Files.size(originFile.toPath())); + + OutlineObjectTree outlineObjectTree = outlineExtractorService.getOutlineObjectTree(originDocument); + Map> pdfTableCells = cvTableParsingAdapter.buildCvParsedTablesPerPage(tableServiceResponse); Map> pdfImages = imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse); Map> signatures = visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse); @@ -264,6 +281,12 @@ public class LayoutParsingPipeline { case CLARIFYND, CLARIFYND_PARAGRAPH_DEBUG -> docstrumBlockificationService.blockify(stripper.getTextPositionSequences(), emptyTableCells, false); }; + List outlineObjects = outlineObjectTree.getOutlineObjectsPerPage() + .get(pageNumber - 1); + if(outlineObjects != null) { + blockificationPostprocessingService.sanitizeOutlineBlocks(classificationPage, outlineObjects); + } + classificationPage.setCleanRulings(cleanRulings); classificationPage.setRotation(rotation); classificationPage.setLandscape(isLandscape); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObject.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObject.java new file mode 100644 index 0000000..cddb81b --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObject.java @@ -0,0 +1,25 @@ +package com.knecon.fforesight.service.layoutparser.processor.model.outline; + +import java.awt.geom.Point2D; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@NoArgsConstructor +@AllArgsConstructor +public class OutlineObject { + + private String title; + private int pageNumber; + //private Point2D point; + private int treeDepth; + + @Override + public String toString() { + + return "OutlineObject{" + "title='" + title + '\'' + '}'; + } + +} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObjectTree.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObjectTree.java new file mode 100644 index 0000000..5723cdc --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObjectTree.java @@ -0,0 +1,49 @@ +package com.knecon.fforesight.service.layoutparser.processor.model.outline; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import lombok.Data; +import lombok.RequiredArgsConstructor; + +@Data +@RequiredArgsConstructor +public class OutlineObjectTree { + + private List rootNodes = new ArrayList<>(); + + private Map> outlineObjectsPerPage = new HashMap<>(); + + + public OutlineObjectTree(List rootNodes) { + + this.rootNodes = rootNodes; + flattenNodesAndGroupByPage(rootNodes); + } + + + private void flattenNodesAndGroupByPage(List outlineObjectTreeNodes) { + + for (OutlineObjectTreeNode node : outlineObjectTreeNodes) { + int pageNumber = node.getOutlineObject().getPageNumber(); + if (!this.outlineObjectsPerPage.containsKey(pageNumber)) { + outlineObjectsPerPage.put(pageNumber, new ArrayList<>()); + } + outlineObjectsPerPage.get(pageNumber).add(node.getOutlineObject()); + + if (!node.getChildren().isEmpty()) { + flattenNodesAndGroupByPage(node.getChildren()); + } + } + } + + + @Override + public String toString() { + + return super.toString(); + } + +} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObjectTreeNode.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObjectTreeNode.java new file mode 100644 index 0000000..7753030 --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObjectTreeNode.java @@ -0,0 +1,37 @@ +package com.knecon.fforesight.service.layoutparser.processor.model.outline; + +import java.awt.geom.Point2D; +import java.util.ArrayList; +import java.util.List; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.Getter; + +@Data +public class OutlineObjectTreeNode { + + private OutlineObject outlineObject; + + private List children = new ArrayList<>(); + + + public OutlineObjectTreeNode(OutlineObject outlineObject) { + + this.outlineObject = outlineObject; + } + + + public void addChild(OutlineObjectTreeNode outlineObject) { + + children.add(outlineObject); + } + + + @Override + public String toString() { + + return "OutlineObjectTreeNode{" + "outlineObject=" + outlineObject + '}'; + } + +} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/OutlineExtractorService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/OutlineExtractorService.java new file mode 100644 index 0000000..b33082e --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/OutlineExtractorService.java @@ -0,0 +1,70 @@ +package com.knecon.fforesight.service.layoutparser.processor.services; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem; +import org.springframework.stereotype.Service; + +import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObject; +import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObjectTree; +import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObjectTreeNode; + +import lombok.SneakyThrows; + +@Service +public class OutlineExtractorService { + + @SneakyThrows + public OutlineObjectTree getOutlineObjectTree(PDDocument document) { + + PDDocumentOutline documentOutline = document.getDocumentCatalog().getDocumentOutline(); + + List rootNodes = new ArrayList<>(); + for (PDOutlineItem child : documentOutline.children()) { + OutlineObjectTreeNode outlineObject = createOutlineObjectWithChildren(child, document, 1); + rootNodes.add(outlineObject); + } + + return new OutlineObjectTree(rootNodes); + } + + + @SneakyThrows + private OutlineObjectTreeNode createOutlineObjectWithChildren(PDOutlineItem item, PDDocument document, int depth) { + + OutlineObjectTreeNode outlineObject = createOutlineObject(item, document, depth); + for (var child : item.children()) { + outlineObject.addChild(createOutlineObjectWithChildren(child, document, depth + 1)); + } + + return outlineObject; + } + + + @SneakyThrows + private OutlineObjectTreeNode createOutlineObject(PDOutlineItem item, PDDocument document, int depth) { + + String title = item.getTitle(); + + PDPage page = item.findDestinationPage(document); + int pageNumber = document.getPages().indexOf(page); + + //float x = 0; + //float y = 0; + //COSDictionary cosObject = item.getAction().getCOSObject(); + // if (cosObject.getNameAsString("S").toLowerCase(Locale.ROOT).equals("goto")) { + // COSArray cosArray = cosObject.getCOSArray(COSName.D); + // x = ((COSInteger)cosArray.get(2)).floatValue(); + // y = ((COSInteger)cosArray.get(3)).floatValue(); + // + // } + //return new OutlineObject(title, pageNumber, new Point2D.Float(x, y)); + + return new OutlineObjectTreeNode(new OutlineObject(title, pageNumber, depth)); + } + +} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/BlockificationPostprocessingService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/BlockificationPostprocessingService.java new file mode 100644 index 0000000..27450f7 --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/BlockificationPostprocessingService.java @@ -0,0 +1,46 @@ +package com.knecon.fforesight.service.layoutparser.processor.services.blockification; + +import java.util.List; + +import org.springframework.stereotype.Service; + +import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock; +import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage; +import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType; +import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObject; + +@Service +public class BlockificationPostprocessingService { + + public void sanitizeOutlineBlocks(ClassificationPage classificationPage, List outlineObjects) { + + + for (AbstractPageBlock textBlock : classificationPage.getTextBlocks()) { + for (OutlineObject outlineObject : outlineObjects) { + + String blockText = textBlock.getText(); + String outlineTitle = outlineObject.getTitle(); + + if (!blockText.contains(outlineTitle)) { + continue; + } + + if (blockText.equals(outlineTitle)) { + + textBlock.setClassification(PageBlockType.getHeadlineType(outlineObject.getTreeDepth())); + continue; + } + + splitTextBlock(textBlock, outlineTitle, classificationPage); + + } + } + + } + + + private void splitTextBlock(AbstractPageBlock textBlock, String title, ClassificationPage classificationPage) { + + } + +} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/DocuMineClassificationService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/DocuMineClassificationService.java index d622fc8..a3cbe19 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/DocuMineClassificationService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/DocuMineClassificationService.java @@ -59,6 +59,9 @@ public class DocuMineClassificationService { Matcher matcher2 = pattern2.matcher(textBlock.toString()); Matcher matcher3 = pattern3.matcher(textBlock.toString()); + if(textBlock.getClassification() != null && textBlock.getClassification().isHeadline()) { + return; + } if (document.getFontSizeCounter().getMostPopular() == null) { textBlock.setClassification(PageBlockType.OTHER); return; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/RedactManagerClassificationService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/RedactManagerClassificationService.java index 3e90c57..f3ae604 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/RedactManagerClassificationService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/RedactManagerClassificationService.java @@ -48,6 +48,9 @@ public class RedactManagerClassificationService { var bodyTextFrame = page.getBodyTextFrame(); + if(textBlock.getClassification() != null && textBlock.getClassification().isHeadline()) { + return; + } if (document.getFontSizeCounter().getMostPopular() == null) { textBlock.setClassification(PageBlockType.OTHER); return; diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java index 580961e..d347062 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java @@ -27,7 +27,10 @@ public class ViewerDocumentTest extends BuildDocumentTest { @SneakyThrows public void testViewerDocument() { - String fileName = "files/new/ScrambledTextAfterSorting.pdf"; + //String fileName = "files/new/abschlussarbeiten-template-institut-fur-informatik-padagogische-hochschule-karlsruhe.pdf"; + //String fileName = "files/new/$100m Offers.pdf"; + //String fileName = "files/new/kaust-official-thesis-template.pdf"; + String fileName = "files/new/18-Curacron_ToxicidadeOcularInVitro.pdf"; String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf"; var documentFile = new ClassPathResource(fileName).getFile(); @@ -35,7 +38,7 @@ public class ViewerDocumentTest extends BuildDocumentTest { LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService); long start = System.currentTimeMillis(); - Document document = buildGraph(fileName, LayoutParsingType.DOCUMINE); + Document document = buildGraph(fileName, LayoutParsingType.REDACT_MANAGER); layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true); System.out.printf("Total time: %.2fs%n", ((float) (System.currentTimeMillis() - start)) / 1000); }