Merge branch 'experimental_features' into 'master'

DM-305: port rules to new schema

See merge request redactmanager/redaction-service!44
This commit is contained in:
Kilian Schüttler 2023-07-12 13:44:43 +02:00
commit 36fcc88671
14 changed files with 478 additions and 16 deletions

View File

@ -26,6 +26,8 @@ public abstract class AbstractPageBlock {
@JsonIgnore
protected int page;
int columnIndex;
@JsonIgnore
private Orientation orientation = Orientation.NONE;
@ -77,4 +79,10 @@ public abstract class AbstractPageBlock {
return this.minY <= atc.getMaxY() && this.maxY >= atc.getMinY();
}
public boolean intersectsX(AbstractPageBlock atc) {
return this.minX <= atc.getMaxX() && this.maxX >= atc.getMinX();
}
}

View File

@ -0,0 +1,14 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
import java.awt.geom.Rectangle2D;
import lombok.AllArgsConstructor;
@AllArgsConstructor
public class Column {
int index;
ColumnType columnType;
Rectangle2D bBox;
}

View File

@ -0,0 +1,6 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
public enum ColumnType {
RULING,
DISTANCE
}

View File

@ -0,0 +1,149 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.service;
import java.awt.geom.Rectangle2D;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.stream.IntStream;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPositionSequence;
import lombok.experimental.UtilityClass;
@UtilityClass
public class ColumnDetectionService {
private static final double SPLITTABLE_LINE_PERCENTAGE_THRESHOLD = 0.6;
private static final int MAX_NUMBER_OF_COLUMNS = 4;
public List<Rectangle2D> detectColumns(List<TextPositionSequence> textPositionSequences, Rectangle2D mainBodyTextFrame) {
if (textPositionSequences.size() < 2) {
return List.of(mainBodyTextFrame);
}
List<List<Rectangle2D>> linesWithGaps = LineDetectionService.findLinesWithGaps(textPositionSequences);
Map<Integer, List<Integer>> linesWithMatchingGapIndices = new HashMap<>();
for (int numberOfColumns = 2; numberOfColumns <= MAX_NUMBER_OF_COLUMNS; numberOfColumns++) {
linesWithMatchingGapIndices.put(numberOfColumns, findConsecutiveLinesWithMatchingGaps(linesWithGaps, mainBodyTextFrame.getWidth(), numberOfColumns));
}
int optimalNumberOfColumns = findOptimalNumberOfColumns(linesWithMatchingGapIndices, linesWithGaps.size());
if (optimalNumberOfColumns == 1) {
return List.of(mainBodyTextFrame);
}
return buildColumns(mainBodyTextFrame, getLinesWithMatchingGaps(linesWithMatchingGapIndices.get(optimalNumberOfColumns), linesWithGaps), optimalNumberOfColumns);
}
private static List<Integer> findConsecutiveLinesWithMatchingGaps(List<List<Rectangle2D>> linesWithGaps, double width, int numberOfColumns) {
List<Boolean> booleans = lineHasMatchingGap(linesWithGaps, width, numberOfColumns);
return findConsecutiveTrueIndicesWithMaxLengthRun(booleans);
}
private List<Boolean> lineHasMatchingGap(List<List<Rectangle2D>> linesWithGaps, double width, int numberOfColumns) {
return linesWithGaps.stream()
.map(blocksWithGaps -> IntStream.range(1, numberOfColumns)
.allMatch(columnIndex -> noBlocksIntersectX(blocksWithGaps, calculateGapLocation(width, numberOfColumns, columnIndex))))
.toList();
}
private List<Integer> findConsecutiveTrueIndicesWithMaxLengthRun(List<Boolean> booleans) {
List<Integer> maxConsecutiveTrueIndices = new LinkedList<>();
List<Integer> currentConsecutiveTrueIndices = new LinkedList<>();
for (int i = 0; i < booleans.size(); i++) {
if (!booleans.get(i)) {
if (currentConsecutiveTrueIndices.isEmpty()) {
continue;
}
if (currentConsecutiveTrueIndices.size() > maxConsecutiveTrueIndices.size()) {
maxConsecutiveTrueIndices = currentConsecutiveTrueIndices;
}
currentConsecutiveTrueIndices = new LinkedList<>();
continue;
}
currentConsecutiveTrueIndices.add(i);
}
if (currentConsecutiveTrueIndices.size() > maxConsecutiveTrueIndices.size()) {
return currentConsecutiveTrueIndices;
}
return maxConsecutiveTrueIndices;
}
private static int findOptimalNumberOfColumns(Map<Integer, List<Integer>> linesWithMatchingGapIndices, Integer numberOfLines) {
return linesWithMatchingGapIndices.entrySet()
.stream()
.max(comparePercentages(numberOfLines))
.filter(entry -> percentageIsAboveThreshold(entry, numberOfLines))
.map(Map.Entry::getKey)
.orElse(1);
}
private List<Rectangle2D> buildColumns(Rectangle2D mainBodyTextFrame, List<Rectangle2D> rectanglesToMerge, int optimalColumnCount) {
if (optimalColumnCount == 1 || rectanglesToMerge.isEmpty()) {
return List.of(mainBodyTextFrame);
}
double maxY = rectanglesToMerge.get(0).getMaxY();
double minY = rectanglesToMerge.get(rectanglesToMerge.size() - 1).getMinY();
List<Rectangle2D> columns = new LinkedList<>();
double width = mainBodyTextFrame.getWidth() / optimalColumnCount;
double height = maxY - minY;
for (int i = 0; i < optimalColumnCount; i++) {
columns.add(new Rectangle2D.Double(mainBodyTextFrame.getMinY() + i * width, minY, width, height));
}
return columns;
}
private Comparator<Map.Entry<Integer, List<Integer>>> comparePercentages(Integer numberOfLines) {
return Comparator.comparingDouble(entry -> calculatePercentage(entry.getValue().size(), numberOfLines));
}
private List<Rectangle2D> getLinesWithMatchingGaps(List<Integer> linesWithMatchingGapIndices, List<List<Rectangle2D>> linesWithGaps) {
return linesWithMatchingGapIndices.stream().map(linesWithGaps::get).flatMap(Collection::stream).toList();
}
private boolean percentageIsAboveThreshold(Map.Entry<Integer, List<Integer>> entry, Integer numberOfLines) {
return calculatePercentage(entry.getValue().size(), numberOfLines) > SPLITTABLE_LINE_PERCENTAGE_THRESHOLD;
}
private double calculatePercentage(Integer numberOfMatchingLines, Integer numberOfLines) {
return ((double) numberOfMatchingLines) / ((double) numberOfLines);
}
private double calculateGapLocation(double pageWidth, int numberOfColumns, int columnIndex) {
return (pageWidth / numberOfColumns) * columnIndex;
}
private Boolean noBlocksIntersectX(List<Rectangle2D> blocksWithGaps, double x) {
return blocksWithGaps.stream().noneMatch(rect -> rect.getMaxX() > x && rect.getMinX() < x);
}
}

View File

@ -0,0 +1,115 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.service;
import java.awt.geom.Rectangle2D;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RectangleTransformations;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.TextPositionSequenceComparator;
import lombok.AllArgsConstructor;
import lombok.experimental.UtilityClass;
@UtilityClass
public class LineDetectionService {
private static final double X_GAP_FACTOR = 1; // multiplied with average text height, determines the minimum distance of gaps in lines
public static List<List<Rectangle2D>> findLinesWithGaps(List<TextPositionSequence> textPositionSequences) {
if (textPositionSequences.isEmpty()) {
return Collections.emptyList();
}
final double avgTextPositionHeight = textPositionSequences.stream().mapToDouble(TextPositionSequence::getHeight).average().orElseThrow();
Context context = Context.init();
List<TextPositionSequence> sortedTextPositionSequence = textPositionSequences.stream().sorted(new TextPositionSequenceComparator()).toList();
var previousTextPosition = sortedTextPositionSequence.get(0);
context.textPositionsToMerge.add(previousTextPosition);
for (TextPositionSequence currentTextPosition : sortedTextPositionSequence.subList(1, sortedTextPositionSequence.size())) {
if (isNewLine(currentTextPosition, previousTextPosition, avgTextPositionHeight) || isSplitByOrientation(currentTextPosition, previousTextPosition)) {
addBlockToLine(context);
startNewLine(currentTextPosition, context);
} else if (isXGap(currentTextPosition, previousTextPosition, avgTextPositionHeight)) {
addBlockToLine(context);
startNewBlock(currentTextPosition, context);
} else {
context.textPositionsToMerge.add(currentTextPosition);
}
previousTextPosition = currentTextPosition;
}
addBlockToLine(context);
return context.linesWithGaps;
}
private static boolean isXGap(TextPositionSequence currentTextPosition, TextPositionSequence previousTextPosition, double avgTextPositionHeight) {
return Math.abs(previousTextPosition.getMaxXDirAdj() - currentTextPosition.getMinXDirAdj()) > (avgTextPositionHeight * X_GAP_FACTOR);
}
private static boolean isSplitByOrientation(TextPositionSequence currentTextPosition, TextPositionSequence previousTextPosition) {
return !previousTextPosition.getDir().equals(currentTextPosition.getDir());
}
private static boolean isNewLine(TextPositionSequence currentTextPosition, TextPositionSequence previousTextPosition, double avgTextPositionHeight) {
return Math.abs(previousTextPosition.getMinYDirAdj() - currentTextPosition.getMinYDirAdj()) > avgTextPositionHeight;
}
private static void startNewBlock(TextPositionSequence currentTextPosition, Context context) {
context.textPositionsToMerge = new LinkedList<>();
context.textPositionsToMerge.add(currentTextPosition);
}
private static void addBlockToLine(Context context) {
context.blocksInLine.add(textPositionBBox(context.textPositionsToMerge));
}
private static void startNewLine(TextPositionSequence current, Context context) {
context.blocksInLine = new LinkedList<>();
startNewBlock(current, context);
context.linesWithGaps.add(context.blocksInLine);
}
private Rectangle2D textPositionBBox(List<TextPositionSequence> textPositionSequences) {
return RectangleTransformations.rectangleBBox(textPositionSequences.stream().map(TextPositionSequence::getRectangle).toList());
}
@AllArgsConstructor
private class Context {
List<List<Rectangle2D>> linesWithGaps;
List<Rectangle2D> blocksInLine;
List<TextPositionSequence> textPositionsToMerge;
public static Context init() {
List<List<Rectangle2D>> initialLinesWithGaps = new LinkedList<>();
List<Rectangle2D> initialBlocksInLine = new LinkedList<>();
initialLinesWithGaps.add(initialBlocksInLine);
return new Context(initialLinesWithGaps, initialBlocksInLine, new LinkedList<>());
}
}
}

View File

@ -29,6 +29,7 @@ import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.mo
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.parsing.PDFLinesTextStripper;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.utils.FileUtils;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RectangleTransformations;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import lombok.RequiredArgsConstructor;
@ -129,6 +130,7 @@ public class PdfSegmentationService {
stripper.getRulings(),
stripper.getMinCharWidth(),
stripper.getMaxCharHeight());
// var columns = ColumnDetectionService.detectColumns(stripper.getTextPositionSequences(), RectangleTransformations.toRectangle2D(pdPage.getCropBox()));
ClassificationPage page = blockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
page.setRotation(rotation);

View File

@ -23,7 +23,7 @@ import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.ut
@Service
@SuppressWarnings("all")
@ConditionalOnProperty(prefix = "application", name = "type", havingValue = "RedactManager")
public class RedactManagerBlockificationService implements BlockificationService{
public class RedactManagerBlockificationService implements BlockificationService {
static final float THRESHOLD = 1f;

View File

@ -82,7 +82,6 @@ public class DocumentGraphFactory {
page.getMainBody().add(node);
List<TextPageBlock> textBlocks = new ArrayList<>(textBlocksToMerge);
textBlocks.add(originalTextBlock);
AtomicTextBlock textBlock = context.textBlockFactory.fromContext(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), node, context, page);
List<Integer> treeId = context.documentTree.createNewChildEntryAndReturnId(parentNode, node);
node.setLeafTextBlock(textBlock);
@ -181,7 +180,7 @@ public class DocumentGraphFactory {
Page page = context.getPage(pageIndex);
Header header = Header.builder().documentTree(context.getDocumentTree()).build();
AtomicTextBlock textBlock = context.textBlockFactory.emptyTextBlock(header, 0, page);
AtomicTextBlock textBlock = context.textBlockFactory.emptyTextBlockFromInteger(header, 0, page);
List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(header);
header.setTreeId(tocId);
header.setLeafTextBlock(textBlock);

View File

@ -9,6 +9,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.AbstractPageBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.image.ClassifiedImage;
@ -80,9 +81,10 @@ public class SectionNodeFactory {
remainingBlocks.removeAll(alreadyMerged);
if (abstractPageBlock instanceof TextPageBlock) {
List<TextPageBlock> textBlocks = findTextBlocksWithSameClassificationAndAlignsYAndSameOrientation(abstractPageBlock, remainingBlocks);
alreadyMerged.addAll(textBlocks);
DocumentGraphFactory.addParagraphOrHeadline(section, (TextPageBlock) abstractPageBlock, context, textBlocks);
// List<TextPageBlock> textBlocksToMerge = findTextBlocksWithSameClassificationAndAlignsYAndSameOrientationUntilConvergence((TextPageBlock) abstractPageBlock, remainingBlocks);
List<TextPageBlock> textBlocksToMerge = findTextBlocksWithSameClassificationAndAlignsYAndSameOrientation(List.of((TextPageBlock) abstractPageBlock), remainingBlocks);
alreadyMerged.addAll(textBlocksToMerge);
DocumentGraphFactory.addParagraphOrHeadline(section, (TextPageBlock) abstractPageBlock, context, textBlocksToMerge);
} else if (abstractPageBlock instanceof TablePageBlock tablePageBlock) {
List<TablePageBlock> tablesToMerge = TableMergingUtility.findConsecutiveTablesWithSameColCountAndSameHeaders(tablePageBlock, remainingBlocks);
alreadyMerged.addAll(tablesToMerge);
@ -162,15 +164,30 @@ public class SectionNodeFactory {
}
private List<TextPageBlock> findTextBlocksWithSameClassificationAndAlignsYAndSameOrientation(AbstractPageBlock atc, List<AbstractPageBlock> pageBlocks) {
private List<TextPageBlock> findTextBlocksWithSameClassificationAndAlignsYAndSameOrientationUntilConvergence(TextPageBlock originalTextBlocks,
List<AbstractPageBlock> pageBlocks) {
return pageBlocks.stream()
.filter(abstractPageBlock -> !abstractPageBlock.equals(atc))
.filter(abstractPageBlock -> abstractPageBlock.getPage() == atc.getPage())
.filter(abstractPageBlock -> abstractPageBlock.getOrientation().equals(atc.getOrientation()))
.filter(abstractPageBlock -> abstractPageBlock.intersectsY(atc))
.filter(abstractPageBlock -> abstractPageBlock instanceof TextPageBlock)
.map(abstractPageBlock -> (TextPageBlock) abstractPageBlock)
int previousCount = 1;
List<TextPageBlock> alignedBlocks = findTextBlocksWithSameClassificationAndAlignsYAndSameOrientation(List.of(originalTextBlocks), pageBlocks);
while (previousCount < alignedBlocks.size()) {
alignedBlocks = findTextBlocksWithSameClassificationAndAlignsYAndSameOrientation(alignedBlocks, pageBlocks);
previousCount = alignedBlocks.size();
}
return alignedBlocks;
}
private static List<TextPageBlock> findTextBlocksWithSameClassificationAndAlignsYAndSameOrientation(List<TextPageBlock> textBlocksToMerge, List<AbstractPageBlock> pageBlocks) {
return Stream.concat(pageBlocks.stream()
.filter(abstractPageBlock -> !textBlocksToMerge.contains(abstractPageBlock))
.filter(abstractPageBlock -> textBlocksToMerge.stream().allMatch(textBlockToMerge -> abstractPageBlock.getPage() == textBlockToMerge.getPage()))
.filter(abstractPageBlock -> textBlocksToMerge.stream().allMatch(textBlockToMerge -> abstractPageBlock.getOrientation().equals(textBlockToMerge.getOrientation())))
.filter(abstractPageBlock -> textBlocksToMerge.stream().anyMatch(abstractPageBlock::intersectsY))
//.filter(abstractPageBlock -> textBlocksToMerge.stream().anyMatch(abstractPageBlock::intersectsX))
.filter(abstractPageBlock -> abstractPageBlock instanceof TextPageBlock)
.map(abstractPageBlock -> (TextPageBlock) abstractPageBlock), //
textBlocksToMerge.stream())//
.toList();
}

View File

@ -43,7 +43,7 @@ public class TextBlockFactory {
}
public AtomicTextBlock emptyTextBlock(SemanticNode parent, Integer numberOnPage, Page page) {
public AtomicTextBlock emptyTextBlockFromInteger(SemanticNode parent, Integer numberOnPage, Page page) {
long idx = textBlockIdx;
textBlockIdx++;

View File

@ -104,6 +104,12 @@ public class RectangleTransformations {
}
public static Rectangle2D toRectangle2D(PDRectangle cropBox) {
return new Rectangle2D.Double(cropBox.getLowerLeftX(), cropBox.getLowerLeftY(), cropBox.getWidth(), cropBox.getHeight());
}
private static class Rectangle2DBBoxCollector implements Collector<Rectangle2D, Rectangle2DBBoxCollector.BBox, Rectangle2D> {
@Override
@ -133,7 +139,7 @@ public class RectangleTransformations {
@Override
public Function<BBox, Rectangle2D> finisher() {
return bb -> new Rectangle2D.Double(bb.lowerLeftX, bb.lowerLeftY, bb.upperRightX - bb.lowerLeftX, bb.upperRightY - bb.lowerLeftY);
return BBox::toRectangle2D;
}
@ -154,6 +160,15 @@ public class RectangleTransformations {
Double upperRightY;
public Rectangle2D toRectangle2D() {
if (lowerLeftX == null || lowerLeftY == null || upperRightX == null || upperRightY == null) {
return new Rectangle2D.Double(0, 0, 0, 0);
}
return new Rectangle2D.Double(lowerLeftX, lowerLeftY, upperRightX - lowerLeftX, upperRightY - lowerLeftY);
}
public void addRectangle(Rectangle2D rectangle2D) {
double lowerLeftX = Math.min(rectangle2D.getMinX(), rectangle2D.getMaxX());

View File

@ -0,0 +1,58 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.service;
import java.awt.geom.Rectangle2D;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.junit.jupiter.api.Test;
import org.springframework.core.io.ClassPathResource;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.parsing.PDFLinesTextStripper;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.PdfVisualisationUtility;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RectangleTransformations;
import lombok.SneakyThrows;
class ColumnDetectionServiceTest {
@Test
@SneakyThrows
public void testColumnDetection() {
String filename = "files/Documine/Flora/ProblemDocs/S37Struktur.pdf";
var tmpFileName = "/tmp/" + filename.split("/")[2] + "_COLUMNS.pdf";
try (InputStream inputStream = new ClassPathResource(filename).getInputStream()) {
PDDocument pdDocument = PDDocument.load(inputStream);
System.out.println("start column detection");
long start = System.currentTimeMillis();
for (int pageNumber = 1; pageNumber < pdDocument.getNumberOfPages() + 1; pageNumber++) {
PDFLinesTextStripper stripper = new PDFLinesTextStripper();
PDPage pdPage = pdDocument.getPage(pageNumber - 1);
stripper.setPageNumber(pageNumber);
stripper.setStartPage(pageNumber);
stripper.setEndPage(pageNumber);
stripper.setPdpage(pdPage);
stripper.getText(pdDocument);
List<Rectangle2D> columns = ColumnDetectionService.detectColumns(stripper.getTextPositionSequences(), RectangleTransformations.toRectangle2D(pdPage.getCropBox()));
System.out.printf("found %d columns on page %d%n", columns.size(), pageNumber);
PdfVisualisationUtility.drawRectangle2DList(pdDocument, pageNumber, columns, PdfVisualisationUtility.Options.builder().stroke(true).build());
}
System.out.printf("finished col detection, took %d ms", System.currentTimeMillis() - start);
try (var out = new FileOutputStream(tmpFileName)) {
pdDocument.save(out);
pdDocument.close();
}
}
}
}

View File

@ -0,0 +1,79 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.service;
import java.awt.geom.Point2D;
import java.awt.geom.Rectangle2D;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.springframework.core.io.ClassPathResource;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.parsing.PDFLinesTextStripper;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.PdfVisualisationUtility;
import lombok.SneakyThrows;
class LineDetectionServiceTest {
@Test
@Disabled
@SneakyThrows
public void testLineDetection() {
String filename = "files/BDR/Plenarprotokoll 1 (keine Druchsache!) (1).pdf";
var tmpFileName = "/tmp/" + filename.split("/")[2] + "_LINES.pdf";
try (InputStream inputStream = new ClassPathResource(filename).getInputStream()) {
PDDocument pdDocument = PDDocument.load(inputStream);
System.out.println("start column detection");
long start = System.currentTimeMillis();
for (int pageNumber = 1; pageNumber < pdDocument.getNumberOfPages() + 1; pageNumber++) {
PDFLinesTextStripper stripper = new PDFLinesTextStripper();
PDPage pdPage = pdDocument.getPage(pageNumber - 1);
stripper.setPageNumber(pageNumber);
stripper.setStartPage(pageNumber);
stripper.setEndPage(pageNumber);
stripper.setPdpage(pdPage);
stripper.getText(pdDocument);
List<List<Rectangle2D>> linesWithGaps = LineDetectionService.findLinesWithGaps(stripper.getTextPositionSequences());
System.out.printf("found %d lines on page %d%n", linesWithGaps.size(), pageNumber);
for (int i = 0; i < linesWithGaps.size(); i++) {
PdfVisualisationUtility.drawRectangle2DList(pdDocument, pageNumber, linesWithGaps.get(i), PdfVisualisationUtility.Options.builder().stroke(true).build());
PdfVisualisationUtility.drawText(String.format("%d", i),
pdDocument,
new Point2D.Double(linesWithGaps.get(i).get(0).getX() - (5 + (5 * countNumberOfDigits(i))), linesWithGaps.get(i).get(0).getY() + 2),
pageNumber,
PdfVisualisationUtility.Options.builder().stroke(true).build());
}
}
System.out.printf("finished line detection, took %d ms", System.currentTimeMillis() - start);
try (var out = new FileOutputStream(tmpFileName)) {
pdDocument.save(out);
pdDocument.close();
}
}
}
private int countNumberOfDigits(int num) {
if (num == 0) {
return 1;
}
int count = 0;
for (; num != 0; num /= 10, ++count) {
}
return count;
}
}