diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java index 4b8df27..3699af5 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java @@ -284,7 +284,7 @@ public class LayoutParsingPipeline { List outlineObjects = outlineObjectTree.getOutlineObjectsPerPage() .get(pageNumber - 1); if(outlineObjects != null) { - blockificationPostprocessingService.sanitizeOutlineBlocks(classificationPage, outlineObjects); + blockificationPostprocessingService.sanitizeOutlineBlocksWithKdTree(classificationPage, outlineObjects); } classificationPage.setCleanRulings(cleanRulings); @@ -304,7 +304,7 @@ public class LayoutParsingPipeline { } if (signatures.containsKey(pageNumber)) { - if (classificationPage.getImages() == null || classificationPage.getImages().size() == 0) { + if (classificationPage.getImages() == null || classificationPage.getImages().isEmpty()) { classificationPage.setImages(signatures.get(pageNumber)); } else { classificationPage.getImages().addAll(signatures.get(pageNumber)); @@ -337,6 +337,8 @@ public class LayoutParsingPipeline { case CLARIFYND -> clarifyndClassificationService.classifyDocument(classificationDocument); } + // compute ToC + log.info("Building Sections for {}", identifier); switch (layoutParsingType) { diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/OutlineExtractorService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/OutlineExtractorService.java index 0dbe471..314e384 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/OutlineExtractorService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/OutlineExtractorService.java @@ -11,7 +11,6 @@ import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.cos.COSNumber; import org.apache.pdfbox.cos.COSString; import org.apache.pdfbox.pdmodel.PDDestinationNameTreeNode; import org.apache.pdfbox.pdmodel.PDDocument; @@ -19,6 +18,10 @@ import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.interactive.action.PDAction; import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDDestination; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageFitHeightDestination; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageFitRectangleDestination; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageFitWidthDestination; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageXYZDestination; import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline; import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem; import org.springframework.stereotype.Service; @@ -34,6 +37,16 @@ import lombok.extern.slf4j.Slf4j; @Slf4j public class OutlineExtractorService { + private static final String PDDESTINATION_TYPE_FIT = "Fit"; + private static final String PDDESTINATION_TYPE_FIT_B = "FitB"; + private static final String PDDESTINATION_TYPE_FIT_H = "FitH"; + private static final String PDDESTINATION_TYPE_FIT_V = "FitV"; + private static final String PDDESTINATION_TYPE_FIT_R = "FitR"; + private static final String PDDESTINATION_TYPE_FIT_BH = "FitBH"; + private static final String PDDESTINATION_TYPE_FIT_BV = "FitBV"; + private static final String PDDESTINATION_TYPE_XYZ = "XYZ"; + + @SneakyThrows public OutlineObjectTree getOutlineObjectTree(PDDocument document) { @@ -41,8 +54,7 @@ public class OutlineExtractorService { List rootNodes = new ArrayList<>(); for (PDOutlineItem child : documentOutline.children()) { - Optional outlineObject = createOutlineObjectWithChildren(child, document, 1); - outlineObject.ifPresent(rootNodes::add); + rootNodes.add(createOutlineObjectWithChildren(child, document, 1)); } return new OutlineObjectTree(rootNodes); @@ -50,85 +62,81 @@ public class OutlineExtractorService { @SneakyThrows - private Optional createOutlineObjectWithChildren(PDOutlineItem item, PDDocument document, int depth) { + private OutlineObjectTreeNode createOutlineObjectWithChildren(PDOutlineItem item, PDDocument document, int depth) { - Optional outlineObject = createOutlineObject(item, document, depth); - if (outlineObject.isPresent()) { - for (var child : item.children()) { - Optional outlineObjectWithChildren = createOutlineObjectWithChildren(child, document, depth + 1); - outlineObjectWithChildren.ifPresent(outlineObjectTreeNode -> outlineObject.get().addChild(outlineObjectTreeNode)); - } + OutlineObjectTreeNode outlineObject = createOutlineObject(item, document, depth); + for (var child : item.children()) { + OutlineObjectTreeNode outlineObjectWithChildren = createOutlineObjectWithChildren(child, document, depth + 1); + outlineObject.addChild(outlineObjectWithChildren); } return outlineObject; } - private Optional createOutlineObject(PDOutlineItem item, PDDocument document, int depth) { + @SneakyThrows + private OutlineObjectTreeNode createOutlineObject(PDOutlineItem item, PDDocument document, int depth) { + + String title = item.getTitle(); + + PDPage page = item.findDestinationPage(document); + int pageNumber = document.getPages().indexOf(page); + + Optional outlinePosition = Optional.empty(); try { - String title = item.getTitle(); - - PDPage page = item.findDestinationPage(document); - int pageNumber = document.getPages().indexOf(page); - - PDDocumentNameDictionary names = document.getDocumentCatalog().getNames(); PDDestinationNameTreeNode destinations = null; if (names != null) { destinations = names.getDests(); } - Optional outlinePosition = Optional.empty(); - PDDestination destination = item.getDestination(); if (destination != null) { - outlinePosition = getLocationFromCOSBase(page, destinations, destination.getCOSObject()); + outlinePosition = getLocationFromCOSBase(destinations, destination.getCOSObject()); } if (outlinePosition.isEmpty()) { PDAction action = item.getAction(); - COSDictionary cosDictionary = null; if (action != null) { - cosDictionary = action.getCOSObject(); + outlinePosition = extractOutlineLocationGoTo(destinations, action.getCOSObject()); } - outlinePosition = extractOutlineLocationGoTo(page, cosDictionary, destinations); } - return outlinePosition.map(position -> new OutlineObjectTreeNode(new OutlineObject(title, pageNumber, position, depth))); } catch (Exception e) { - - log.info("Could not find outline item in document with title: " + item.getTitle()); - return Optional.empty(); + log.info(String.format("Error occurred during position resolution for outline item on page %s with title %s: " + e, pageNumber, title)); } + + return new OutlineObjectTreeNode(new OutlineObject(title, pageNumber, outlinePosition.orElse(new Point2D.Float(0, 0)), depth)); + } @SneakyThrows - private static Optional extractOutlineLocationGoTo(PDPage page, COSDictionary cosDictionary, PDDestinationNameTreeNode destinations) { + private static Optional extractOutlineLocationGoTo(PDDestinationNameTreeNode destinations, COSDictionary cosDictionary) { if (isGoToAction(cosDictionary)) { COSBase cosBase = cosDictionary.getItem(COSName.D); - return getLocationFromCOSBase(page, destinations, cosBase); + return getLocationFromCOSBase(destinations, cosBase); } return Optional.empty(); } - private static Optional getLocationFromCOSBase(PDPage page, PDDestinationNameTreeNode destinations, COSBase cosBase) throws IOException { + private static Optional getLocationFromCOSBase(PDDestinationNameTreeNode destinations, COSBase cosBase) throws IOException { if (cosBase != null) { if (cosBase instanceof COSArray cosArray) { - return getLocationFromCosArrayWithXYZCoordinates(cosArray, page); + return getLocationFromCosArray(cosArray); } if (cosBase instanceof COSString cosString) { String destinationName = cosString.getString(); COSArray cosArray = destinations.getValue(destinationName).getCOSObject(); - return getLocationFromCosArrayWithXYZCoordinates(cosArray, page); + return getLocationFromCosArray(cosArray); } } @@ -136,20 +144,54 @@ public class OutlineExtractorService { } - private static Optional getLocationFromCosArrayWithXYZCoordinates(COSArray cosArray, PDPage page) { + private static Optional getLocationFromCosArray(COSArray cosArray) { - if (isXYZDestination(cosArray)) { - float x = ((COSNumber) cosArray.get(2)).floatValue(); - float y = Math.abs(((COSNumber) cosArray.get(3)).floatValue() - page.getMediaBox().getHeight()); - return Optional.of(new Point2D.Float(x, y)); + boolean located = false; + float x = 0; + float y = 0; + + try { + + PDDestination destination = PDDestination.create(cosArray); + COSName type = (COSName) cosArray.getObject(1); + String typeString = type.getName(); + + switch (typeString) { + case PDDESTINATION_TYPE_FIT_V: + case PDDESTINATION_TYPE_FIT_BV: + PDPageFitHeightDestination fitHeightDestination = (PDPageFitHeightDestination) destination; + x = fitHeightDestination.getLeft(); + located = true; + break; + case PDDESTINATION_TYPE_FIT_R: + PDPageFitRectangleDestination fitRectangleDestination = (PDPageFitRectangleDestination) destination; + x = fitRectangleDestination.getLeft(); + y = fitRectangleDestination.getTop(); + located = true; + break; + case PDDESTINATION_TYPE_FIT_H: + case PDDESTINATION_TYPE_FIT_BH: + PDPageFitWidthDestination fitWidthDestination = (PDPageFitWidthDestination) destination; + y = fitWidthDestination.getTop(); + located = true; + break; + case PDDESTINATION_TYPE_XYZ: + PDPageXYZDestination xyzDestination = (PDPageXYZDestination) destination; + x = xyzDestination.getLeft(); + y = xyzDestination.getTop(); + located = true; + break; + case PDDESTINATION_TYPE_FIT: + case PDDESTINATION_TYPE_FIT_B: + default: + } + + } catch (IOException e) { + throw new RuntimeException(e); } - return Optional.empty(); - } + return located ? Optional.of(new Point2D.Float(x, y)) : Optional.empty(); - private static boolean isXYZDestination(COSArray cosArray) { - - return cosArray != null && cosArray.getName(1).equals("XYZ"); } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/BlockificationPostprocessingService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/BlockificationPostprocessingService.java index b3f288c..5d1a1d8 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/BlockificationPostprocessingService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/BlockificationPostprocessingService.java @@ -1,57 +1,298 @@ package com.knecon.fforesight.service.layoutparser.processor.services.blockification; +import static com.knecon.fforesight.service.layoutparser.processor.services.blockification.DocstrumBlockificationService.buildTextBlock; + +import java.awt.geom.Rectangle2D; +import java.util.ArrayList; +import java.util.Comparator; import java.util.List; +import java.util.function.Function; import org.springframework.stereotype.Service; +import org.tinspin.index.Index; +import org.tinspin.index.kdtree.KDIterator; +import org.tinspin.index.kdtree.KDTree; -import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock; +import com.knecon.fforesight.service.layoutparser.processor.docstrum.utils.DoubleUtils; import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage; import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType; import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObject; import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock; -import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence; +import com.knecon.fforesight.service.layoutparser.processor.services.factory.SearchTextWithTextPositionFactory; +import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations; @Service public class BlockificationPostprocessingService { - public static final float BLOCK_TO_OUTLINE_DISTANCE_THRESHOLD = 8.0f; + private static final float BLOCK_TO_OUTLINE_DISTANCE_THRESHOLD = 5.0f; + private static final double BLOCK_COMPARISON_PRECISION = 1.0; + + private static final Function blockToBoundingBox = (abstractPageBlock) -> abstractPageBlock.getSequences() + .stream() + .map(textPositionSequence -> textPositionSequence.getTextPositions() + .stream() + .map(tp -> SearchTextWithTextPositionFactory.mapRedTextPositionToInitialUserSpace(tp, textPositionSequence)) + .collect(RectangleTransformations.collectBBox())) + .collect(RectangleTransformations.collectBBox()); public void sanitizeOutlineBlocks(ClassificationPage classificationPage, List outlineObjects) { - for (AbstractPageBlock textBlock : classificationPage.getTextBlocks()) { - for (OutlineObject outlineObject : outlineObjects) { + List textBlocks = classificationPage.getTextBlocks() + .stream() + .filter(block -> block instanceof TextPageBlock) + .toList() + .stream() + .map(block -> (TextPageBlock) block) + .toList(); - String blockText = textBlock.getText(); - String outlineTitle = outlineObject.getTitle(); + textBlocks.sort(Comparator.comparing(TextPageBlock::getMinY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, BLOCK_COMPARISON_PRECISION)) + .thenComparing(TextPageBlock::getMinX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, BLOCK_COMPARISON_PRECISION))); - boolean blockTextContainsOutlineTitle = blockText.contains(outlineTitle); - boolean outlineTitleContainsBlockText = outlineTitle.contains(blockText); - if (!isBlockCloseToOutline(textBlock, outlineObject) || !blockTextContainsOutlineTitle && !outlineTitleContainsBlockText) { - continue; - } - - if (blockText.equals(outlineTitle)) { - - textBlock.setClassification(PageBlockType.getHeadlineType(outlineObject.getTreeDepth())); - continue; - } - - if(blockTextContainsOutlineTitle) { - splitTextBlock(textBlock, outlineTitle, classificationPage); - } - - if(outlineTitleContainsBlockText) { - // find other blocks, merge them into current, mark them for deletion after loop + for (OutlineObject outlineObject : outlineObjects) { + + boolean matchedExactly = false; + List splitCandidates = new ArrayList<>(); + List mergeCandidates = new ArrayList<>(); + + for (TextPageBlock textPageBlock : textBlocks) { + matchedExactly = processOutlineObjectForTextBlock(textPageBlock, outlineObject, splitCandidates, mergeCandidates); + + if (matchedExactly) { + break; } + } + if (!matchedExactly) { + //selectMatch(outlineObject, kdTree, splitCandidates, mergeCandidates); } } } - private boolean isBlockCloseToOutline(AbstractPageBlock textBlock, OutlineObject outlineObject) { + public void sanitizeOutlineBlocksWithKdTree(ClassificationPage classificationPage, List outlineObjects) { + + List textBlocks = classificationPage.getTextBlocks() + .stream() + .filter(block -> block instanceof TextPageBlock) + .toList() + .stream() + .map(block -> (TextPageBlock) block) + .toList(); + if (textBlocks.isEmpty() || outlineObjects.isEmpty()) { + return; + } + + KDTree kdTree = KDTree.create(2); + textBlocks.forEach(block -> { + var boundingBox = blockToBoundingBox.apply(block); + kdTree.insert(new double[]{boundingBox.getMinX(), boundingBox.getMaxY()}, block); + }); + + for (OutlineObject outlineObject : outlineObjects) { + + KDIterator successorIterator = kdTree.query(new double[]{ // + 0, // + outlineObject.getPoint().getY() - BLOCK_TO_OUTLINE_DISTANCE_THRESHOLD // + }, // + new double[]{Double.MAX_VALUE, Double.MAX_VALUE}); + + boolean matchedExactly = false; + List splitCandidates = new ArrayList<>(); + List mergeCandidates = new ArrayList<>(); + + while (successorIterator.hasNext() && !matchedExactly) { + TextPageBlock pageBlock = successorIterator.next().value(); + matchedExactly = processOutlineObjectForTextBlock(pageBlock, outlineObject, splitCandidates, mergeCandidates); + } + + if (!matchedExactly) { + selectMatch(classificationPage, outlineObject, kdTree, splitCandidates, mergeCandidates); + } + } + } + + + private void selectMatch(ClassificationPage classificationPage, + OutlineObject outlineObject, + KDTree kdTree, + List splitCandidates, + List mergeCandidates) { + + for (TextPageBlock splitCandidate : splitCandidates) { + System.out.println(splitCandidate); + } + + if (!mergeCandidates.isEmpty()) { + + List allMergeCandidates = new ArrayList<>(mergeCandidates); + addNeighborsOfCandidate(kdTree, mergeCandidates.get(0), allMergeCandidates); + addNeighborsOfCandidate(kdTree, mergeCandidates.get(mergeCandidates.size() - 1), allMergeCandidates); + allMergeCandidates = allMergeCandidates.stream() + .distinct() + .toList(); + + List> combinations = findCombinations(outlineObject.getTitle(), allMergeCandidates); + double maxDistance = Double.MAX_VALUE; + List bestCombination = new ArrayList<>(); + for (List combination : combinations) { + double averageDistance = combination.stream() + .map(block -> calculateDistance(outlineObject, block)) + .mapToDouble(Double::doubleValue).average() + .orElse(Double.MAX_VALUE); + if (maxDistance > averageDistance) { + maxDistance = averageDistance; + bestCombination = combination; + } + } + mergeBlocks(classificationPage, bestCombination); + } + } + + + private void mergeBlocks(ClassificationPage classificationPage, List blocksToMerge) { + + if (blocksToMerge.size() <= 1) { + return; + } + + TextPageBlock firstBlock = blocksToMerge.get(0); + + List mergedBlocks = new ArrayList<>(); + for (TextPageBlock textPageBlock : blocksToMerge.subList(1, blocksToMerge.size())) { + + if (firstBlock != null && !firstBlock.getSequences().isEmpty()) { + + if (textPageBlock.getDir() == firstBlock.getDir()) { + firstBlock.getSequences().addAll(textPageBlock.getSequences()); + mergedBlocks.add(textPageBlock); + } + } + } + + assert firstBlock != null; + buildTextBlock(firstBlock.getSequences(), 0); + firstBlock.setToDuplicate(false); + classificationPage.getTextBlocks().removeAll(mergedBlocks); + + } + + + private static List> findCombinations(String title, List blocks) { + + List> combinations = new ArrayList<>(); + findCombinations(title, blocks, new ArrayList<>(), combinations); + return combinations; + } + + + private static void findCombinations(String title, List blocks, List current, List> combinations) { + + String target = title.replaceAll("\\s", ""); + if (target.isEmpty()) { + combinations.add(new ArrayList<>(current)); + return; + } + + List remaining = blocks.stream() + .filter(block -> !current.contains(block)) + .toList(); + for (TextPageBlock block : remaining) { + String prefix = block.getText().replaceAll("\\s", ""); + if (target.startsWith(prefix)) { + current.add(block); + findCombinations(target.substring(prefix.length()), blocks, current, combinations); + current.remove(current.size() - 1); + } + } + } + + + private double calculateDistance(OutlineObject outlineObject, TextPageBlock pageBlock) { + + double deltaX = outlineObject.getPoint().getX() - pageBlock.getMinX(); + double deltaY = outlineObject.getPoint().getY() - pageBlock.getMinY(); + return Math.sqrt(deltaX * deltaX + deltaY * deltaY); + } + + + private static void addNeighborsOfCandidate(KDTree kdTree, TextPageBlock mergeCandidate, List allMergeCandidates) { + + var boundingBox = blockToBoundingBox.apply(mergeCandidate); + Index.PointIteratorKnn knnIterator = kdTree.queryKnn(new double[]{boundingBox.getMinX(), boundingBox.getMaxY()}, 4); + knnIterator.forEachRemaining(neighbor -> allMergeCandidates.add(neighbor.value())); + } + + + private boolean processOutlineObjectForTextBlock(TextPageBlock pageBlock, + OutlineObject outlineObject, + List splitCandidates, + List mergeCandidates) { + + String blockText = pageBlock.getText(); + String outlineTitle = outlineObject.getTitle(); + + boolean blockTextContainsOutlineTitle = blockText.contains(outlineTitle); + boolean outlineTitleContainsBlockText = outlineTitle.contains(blockText); + + if (!blockTextContainsOutlineTitle && !outlineTitleContainsBlockText) { + return false; + } + + if (blockText.equals(outlineTitle)) { + pageBlock.setClassification(PageBlockType.getHeadlineType(outlineObject.getTreeDepth())); + return true; + } + + if (blockTextContainsOutlineTitle) { + splitCandidates.add(pageBlock); + } + + if (outlineTitleContainsBlockText) { + mergeCandidates.add(pageBlock); + } + return false; + } + + + private void processOutlineObjectForTextBlockOld(ClassificationPage classificationPage, TextPageBlock pageBlock, OutlineObject outlineObject) { + + String blockText = pageBlock.getText(); + String outlineTitle = outlineObject.getTitle(); + + boolean blockTextContainsOutlineTitle = blockText.contains(outlineTitle); + boolean outlineTitleContainsBlockText = outlineTitle.contains(blockText); + + Rectangle2D boundingBox = pageBlock.getSequences() + .stream() + .map(textPositionSequence -> textPositionSequence.getTextPositions() + .stream() + .map(tp -> SearchTextWithTextPositionFactory.mapRedTextPositionToInitialUserSpace(tp, textPositionSequence)) + .collect(RectangleTransformations.collectBBox())) + .collect(RectangleTransformations.collectBBox()); + + if (!isCloseToOutline(boundingBox, outlineObject) || !blockTextContainsOutlineTitle && !outlineTitleContainsBlockText) { + return; + } + + if (blockText.equals(outlineTitle)) { + + pageBlock.setClassification(PageBlockType.getHeadlineType(outlineObject.getTreeDepth())); + return; + } + + if (blockTextContainsOutlineTitle) { + splitTextBlock(pageBlock, outlineTitle, classificationPage); + } + + if (outlineTitleContainsBlockText) { + // find other blocks, merge them into current, mark them for deletion after loop + } + } + + + private boolean isCloseToOutline(Rectangle2D boundingBox, OutlineObject outlineObject) { float threshold = BLOCK_TO_OUTLINE_DISTANCE_THRESHOLD; //if (textBlock instanceof TextPageBlock) { @@ -65,11 +306,11 @@ public class BlockificationPostprocessingService { // } //} - return textBlock.getMinY() - outlineObject.getPoint().getY() < threshold && textBlock.getMinX() - outlineObject.getPoint().getX() < threshold; + return boundingBox.getMinY() - outlineObject.getPoint().getY() < threshold && boundingBox.getMinX() - outlineObject.getPoint().getX() < threshold; } - private void splitTextBlock(AbstractPageBlock textBlock, String title, ClassificationPage classificationPage) { + private void splitTextBlock(TextPageBlock pageBlock, String title, ClassificationPage classificationPage) { } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java index c3666a6..453c772 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java @@ -328,7 +328,7 @@ public class DocstrumBlockificationService { } - private TextPageBlock buildTextBlock(List wordBlockList, int indexOnPage) { + public static TextPageBlock buildTextBlock(List wordBlockList, int indexOnPage) { TextPageBlock textBlock = null; @@ -430,7 +430,7 @@ public class DocstrumBlockificationService { } - private double round(float value, int decimalPoints) { + private static double round(float value, int decimalPoints) { var d = Math.pow(10, decimalPoints); return Math.round(value * d) / d; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SearchTextWithTextPositionFactory.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SearchTextWithTextPositionFactory.java index 7ebc737..7cff0a2 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SearchTextWithTextPositionFactory.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SearchTextWithTextPositionFactory.java @@ -177,7 +177,7 @@ public class SearchTextWithTextPositionFactory { } - private Rectangle2D mapRedTextPositionToInitialUserSpace(RedTextPosition textPosition, TextPositionSequence sequence) { + public Rectangle2D mapRedTextPositionToInitialUserSpace(RedTextPosition textPosition, TextPositionSequence sequence) { float textHeight = sequence.getTextHeight() + HEIGHT_PADDING; Rectangle2D rectangle2D = new Rectangle2D.Double(textPosition.getXDirAdj(), diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java index 9b7bed7..bd5b7f2 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java @@ -2,6 +2,8 @@ package com.knecon.fforesight.service.layoutparser.server.graph; import java.io.File; import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; import java.util.Map; import org.junit.jupiter.api.Disabled; @@ -31,6 +33,8 @@ public class ViewerDocumentTest extends BuildDocumentTest { //String fileName = "files/new/kaust-official-thesis-template.pdf"; //String fileName = "files/new/$100m Offers.pdf"; String fileName = "files/new/18-Curacron_ToxicidadeOcularInVitro.pdf"; + //String fileName = "files/new/mistitled_outlines_example.pdf"; + //String fileName = "files/bdr/Plenarprotokoll 1 (keine Druchsache!) (1) 1.pdf"; String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf"; var documentFile = new ClassPathResource(fileName).getFile(); @@ -69,6 +73,5 @@ public class ViewerDocumentTest extends BuildDocumentTest { layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true); } - }