Compare commits

...

2 Commits

Author SHA1 Message Date
maverickstuder
7d6caabbfb tests 2024-02-14 14:54:10 +01:00
maverickstuder
ec035aca2f tests 2024-02-13 18:04:22 +01:00
6 changed files with 204 additions and 74 deletions

View File

@ -19,4 +19,5 @@ public class PageContents {
Rectangle2D cropBox; Rectangle2D cropBox;
Rectangle2D mediaBox; Rectangle2D mediaBox;
List<Ruling> rulings; List<Ruling> rulings;
List<Rectangle2D> positions;
} }

View File

@ -39,6 +39,8 @@ public class PageContentExtractor {
stripper.setEndPage(pageNumber); stripper.setEndPage(pageNumber);
stripper.setPdpage(pdPage); stripper.setPdpage(pdPage);
stripper.getText(pdDocument); stripper.getText(pdDocument);
var positions = stripper.getExactPositions();
Map<Float, List<TextPositionSequence>> sortedTextPositionSequencesPerDir = stripper.getTextPositionSequences() Map<Float, List<TextPositionSequence>> sortedTextPositionSequencesPerDir = stripper.getTextPositionSequences()
.stream() .stream()
@ -49,7 +51,7 @@ public class PageContentExtractor {
textPositionSequencesPerPage.add(new PageContents(sortedTextPositionSequences, textPositionSequencesPerPage.add(new PageContents(sortedTextPositionSequences,
RectangleTransformations.toRectangle2D(pdPage.getCropBox()), RectangleTransformations.toRectangle2D(pdPage.getCropBox()),
RectangleTransformations.toRectangle2D(pdPage.getMediaBox()), RectangleTransformations.toRectangle2D(pdPage.getMediaBox()),
stripper.getRulings())); stripper.getRulings(), positions));
} }
} }

View File

@ -1,6 +1,5 @@
package com.knecon.fforesight.service.layoutparser.processor.services.blockification; package com.knecon.fforesight.service.layoutparser.processor.services.blockification;
// TODO: figure out, why this fails the build // TODO: figure out, why this fails the build
// import static com.knecon.fforesight.service.layoutparser.processor.services.factory.SearchTextWithTextPositionFactory.HEIGHT_PADDING; // import static com.knecon.fforesight.service.layoutparser.processor.services.factory.SearchTextWithTextPositionFactory.HEIGHT_PADDING;
@ -11,6 +10,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock; import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence; import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
import com.knecon.fforesight.service.layoutparser.processor.utils.RulingTextDirAdjustUtil; import com.knecon.fforesight.service.layoutparser.processor.utils.RulingTextDirAdjustUtil;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.util.*; import java.util.*;
@ -83,13 +83,13 @@ public class TaasBlockificationService {
continue; continue;
} }
Matcher listIdentifierPattern = listIdentifier.matcher(currentTextBlock.getText()); Matcher listIdentifierPattern = listIdentifier.matcher(currentTextBlock.getText());
boolean isListIdentifier = listIdentifierPattern.find(); boolean isListIdentifier = listIdentifierPattern.find();
boolean yGap = Math.abs(currentTextBlock.getPdfMaxY() - previousTextBlock.getPdfMinY()) < previousTextBlock.getMostPopularWordHeight() * Y_GAP_SPLIT_HEIGHT_MODIFIER; boolean yGap = Math.abs(currentTextBlock.getPdfMaxY() - previousTextBlock.getPdfMinY()) < previousTextBlock.getMostPopularWordHeight() * Y_GAP_SPLIT_HEIGHT_MODIFIER;
boolean sameFont = previousTextBlock.getMostPopularWordFont().equals(currentTextBlock.getMostPopularWordFont()) && previousTextBlock.getMostPopularWordFontSize() == currentTextBlock.getMostPopularWordFontSize(); boolean sameFont = previousTextBlock.getMostPopularWordFont()
.equals(currentTextBlock.getMostPopularWordFont()) && previousTextBlock.getMostPopularWordFontSize() == currentTextBlock.getMostPopularWordFontSize();
// boolean yGap = previousTextBlock != null && currentTextBlock.getMinYDirAdj() - maxY > Math.min(word.getHeight(), prev.getHeight()) * Y_GAP_SPLIT_HEIGHT_MODIFIER; // boolean yGap = previousTextBlock != null && currentTextBlock.getMinYDirAdj() - maxY > Math.min(word.getHeight(), prev.getHeight()) * Y_GAP_SPLIT_HEIGHT_MODIFIER;
boolean alignsXRight = Math.abs(currentTextBlock.getPdfMaxX() - previousTextBlock.getPdfMaxX()) < X_ALIGNMENT_THRESHOLD; boolean alignsXRight = Math.abs(currentTextBlock.getPdfMaxX() - previousTextBlock.getPdfMaxX()) < X_ALIGNMENT_THRESHOLD;
@ -119,8 +119,10 @@ public class TaasBlockificationService {
} }
alreadyMerged.add(textPageBlock); alreadyMerged.add(textPageBlock);
textBlocksToMerge.add(Stream.concat(Stream.of(textPageBlock), textBlocksToMerge.add(Stream.concat(Stream.of(textPageBlock),
textPageBlocks.stream().filter(textPageBlock2 -> textPageBlock.almostIntersects(textPageBlock2, INTERSECTS_Y_THRESHOLD, 0) && !alreadyMerged.contains(textPageBlock2)).peek(alreadyMerged::add)) textPageBlocks.stream()
.toList()); .filter(textPageBlock2 -> textPageBlock.almostIntersects(textPageBlock2, INTERSECTS_Y_THRESHOLD, 0) && !alreadyMerged.contains(textPageBlock2))
.peek(alreadyMerged::add))//
.toList());
} }
return textBlocksToMerge.stream().map(TextPageBlock::merge).toList(); return textBlocksToMerge.stream().map(TextPageBlock::merge).toList();
} }
@ -163,8 +165,7 @@ public class TaasBlockificationService {
while (itty.hasNext()) { while (itty.hasNext()) {
TextPageBlock block = (TextPageBlock) itty.next(); TextPageBlock block = (TextPageBlock) itty.next();
if (previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation().equals(Orientation.LEFT) && equalsWithThreshold( if (previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation().equals(Orientation.LEFT) && equalsWithThreshold(block.getMaxY(),
block.getMaxY(),
previous.getMaxY()) || previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation() previous.getMaxY()) || previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation()
.equals(Orientation.RIGHT) && equalsWithThreshold(block.getMaxY(), previous.getMaxY())) { .equals(Orientation.RIGHT) && equalsWithThreshold(block.getMaxY(), previous.getMaxY())) {
previous.add(block); previous.add(block);
@ -189,7 +190,6 @@ public class TaasBlockificationService {
TextPositionSequence prev = null; TextPositionSequence prev = null;
// TODO: make static final constant // TODO: make static final constant
boolean wasSplitted = false; boolean wasSplitted = false;
Float splitX1 = null; Float splitX1 = null;
for (TextPositionSequence word : textPositions) { for (TextPositionSequence word : textPositions) {

View File

@ -16,13 +16,20 @@
*/ */
package com.knecon.fforesight.service.layoutparser.processor.services.parsing; package com.knecon.fforesight.service.layoutparser.processor.services.parsing;
import java.awt.geom.AffineTransform;
import java.awt.geom.Area;
import java.awt.geom.Rectangle2D;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.LinkedList;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.WeakHashMap; import java.util.WeakHashMap;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.fontbox.FontBoxFont;
import org.apache.fontbox.ttf.GlyphData;
import org.apache.fontbox.ttf.TrueTypeFont; import org.apache.fontbox.ttf.TrueTypeFont;
import org.apache.fontbox.util.BoundingBox; import org.apache.fontbox.util.BoundingBox;
import org.apache.pdfbox.contentstream.PDFStreamEngine; import org.apache.pdfbox.contentstream.PDFStreamEngine;
@ -52,12 +59,14 @@ import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDCIDFont; import org.apache.pdfbox.pdmodel.font.PDCIDFont;
import org.apache.pdfbox.pdmodel.font.PDCIDFontType0;
import org.apache.pdfbox.pdmodel.font.PDCIDFontType2; import org.apache.pdfbox.pdmodel.font.PDCIDFontType2;
import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDFontDescriptor; import org.apache.pdfbox.pdmodel.font.PDFontDescriptor;
import org.apache.pdfbox.pdmodel.font.PDSimpleFont; import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont; import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont;
import org.apache.pdfbox.pdmodel.font.PDType0Font; import org.apache.pdfbox.pdmodel.font.PDType0Font;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.font.PDType3Font; import org.apache.pdfbox.pdmodel.font.PDType3Font;
import org.apache.pdfbox.pdmodel.font.encoding.GlyphList; import org.apache.pdfbox.pdmodel.font.encoding.GlyphList;
import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState; import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState;
@ -65,6 +74,8 @@ import org.apache.pdfbox.text.TextPosition;
import org.apache.pdfbox.util.Matrix; import org.apache.pdfbox.util.Matrix;
import org.apache.pdfbox.util.Vector; import org.apache.pdfbox.util.Vector;
import lombok.Getter;
/** /**
* LEGACY text calculations which are known to be incorrect but are depended on by PDFTextStripper. * LEGACY text calculations which are known to be incorrect but are depended on by PDFTextStripper.
* <p> * <p>
@ -86,6 +97,9 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
private final GlyphList glyphList; private final GlyphList glyphList;
private final Map<COSDictionary, Float> fontHeightMap = new WeakHashMap<COSDictionary, Float>(); private final Map<COSDictionary, Float> fontHeightMap = new WeakHashMap<COSDictionary, Float>();
@Getter
private List<Rectangle2D> exactPositions = new LinkedList<>();
/** /**
* Constructor. * Constructor.
@ -163,6 +177,51 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
float horizontalScaling = state.getTextState().getHorizontalScaling() / 100f; float horizontalScaling = state.getTextState().getHorizontalScaling() / 100f;
Matrix textMatrix = getTextMatrix(); Matrix textMatrix = getTextMatrix();
TrueTypeFont ttf = null;
if (font instanceof PDTrueTypeFont) {
ttf = ((PDTrueTypeFont) font).getTrueTypeFont();
} else if (font instanceof PDType0Font) {
PDCIDFont cidFont = ((PDType0Font) font).getDescendantFont();
if (cidFont instanceof PDCIDFontType2) {
ttf = ((PDCIDFontType2) cidFont).getTrueTypeFont();
}
} else if (font instanceof PDType1Font) {
FontBoxFont fontBoxFont = ((PDType1Font) font).getFontBoxFont();
if (fontBoxFont instanceof TrueTypeFont) {
ttf = (TrueTypeFont) fontBoxFont;
} else {
System.out.println("What do?");
}
}
if (ttf != null) {
Integer glyphId = null;
if (font instanceof PDTrueTypeFont) {
glyphId = ((PDTrueTypeFont) font).codeToGID(code);
} else if (font instanceof PDType0Font) {
glyphId = ((PDType0Font) font).codeToGID(code);
} else if (font instanceof PDType1Font) {
FontBoxFont fontBoxFont = ((PDType1Font) font).getFontBoxFont();
if (fontBoxFont instanceof TrueTypeFont) {
glyphId = ((TrueTypeFont) fontBoxFont).getUnicodeCmapLookup().getGlyphId(code);
}
}
if (glyphId != null) {
GlyphData glyph = ttf.getGlyph().getGlyph(glyphId);
if (glyph != null) {
BoundingBox boundingBox = glyph.getBoundingBox();
Rectangle2D rect = new Rectangle2D.Double(boundingBox.getLowerLeftX(), boundingBox.getLowerLeftY(), boundingBox.getWidth(), boundingBox.getHeight());
Area area = new Area(rect);
AffineTransform affineTransform = textRenderingMatrix.createAffineTransform();
float factor = 1f / ttf.getUnitsPerEm();
affineTransform.scale(factor, factor);
exactPositions.add(area.createTransformedArea(affineTransform).getBounds2D());
}
}
}
float displacementX = displacement.getX(); float displacementX = displacement.getX();
// the sorting algorithm is based on the width of the character. As the displacement // the sorting algorithm is based on the width of the character. As the displacement
// for vertical characters doesn't provide any suitable value for it, we have to // for vertical characters doesn't provide any suitable value for it, we have to
@ -170,15 +229,6 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
if (font.isVertical()) { if (font.isVertical()) {
displacementX = font.getWidth(code) / 1000; displacementX = font.getWidth(code) / 1000;
// there may be an additional scaling factor for true type fonts // there may be an additional scaling factor for true type fonts
TrueTypeFont ttf = null;
if (font instanceof PDTrueTypeFont) {
ttf = ((PDTrueTypeFont) font).getTrueTypeFont();
} else if (font instanceof PDType0Font) {
PDCIDFont cidFont = ((PDType0Font) font).getDescendantFont();
if (cidFont instanceof PDCIDFontType2) {
ttf = ((PDCIDFontType2) cidFont).getTrueTypeFont();
}
}
if (ttf != null && ttf.getUnitsPerEm() != 1000) { if (ttf != null && ttf.getUnitsPerEm() != 1000) {
displacementX *= 1000f / ttf.getUnitsPerEm(); displacementX *= 1000f / ttf.getUnitsPerEm();
} }
@ -201,11 +251,11 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
// (modified) text rendering matrix // (modified) text rendering matrix
Matrix nextTextRenderingMatrix = td.multiply(textMatrix).multiply(ctm); // text space -> device space Matrix nextTextRenderingMatrix = td.multiply(textMatrix).multiply(ctm); // text space -> device space
float nextX = nextTextRenderingMatrix.getTranslateX(); float endX = nextTextRenderingMatrix.getTranslateX();
float nextY = nextTextRenderingMatrix.getTranslateY(); float endY = nextTextRenderingMatrix.getTranslateY();
// (modified) width and height calculations // (modified) width and height calculations
float dxDisplay = nextX - textRenderingMatrix.getTranslateX(); float dxDisplay = endX - textRenderingMatrix.getTranslateX();
Float fontHeight = fontHeightMap.get(font.getCOSObject()); Float fontHeight = fontHeightMap.get(font.getCOSObject());
if (fontHeight == null) { if (fontHeight == null) {
fontHeight = computeFontHeight(font); fontHeight = computeFontHeight(font);
@ -271,56 +321,56 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
translatedTextRenderingMatrix = textRenderingMatrix; translatedTextRenderingMatrix = textRenderingMatrix;
} else { } else {
translatedTextRenderingMatrix = Matrix.concatenate(translateMatrix, textRenderingMatrix); translatedTextRenderingMatrix = Matrix.concatenate(translateMatrix, textRenderingMatrix);
nextX -= pageSize.getLowerLeftX(); endX -= pageSize.getLowerLeftX();
nextY -= pageSize.getLowerLeftY(); endY -= pageSize.getLowerLeftY();
} }
// This is a hack for unicode letter with 2 chars e.g. RA see unicodeProblem.pdf // This is a hack for unicode letter with 2 chars e.g. RA see unicodeProblem.pdf
if (unicodeMapping.length() == 2) { if (unicodeMapping.length() == 2) {
processTextPosition(new TextPosition(pageRotation, processTextPosition(new TextPosition(pageRotation,
pageSize.getWidth(), pageSize.getWidth(),
pageSize.getHeight(), pageSize.getHeight(),
translatedTextRenderingMatrix, translatedTextRenderingMatrix,
nextX, endX,
nextY, endY,
Math.abs(dyDisplay), Math.abs(dyDisplay),
dxDisplay, dxDisplay,
Math.abs(spaceWidthDisplay), Math.abs(spaceWidthDisplay),
Character.toString(unicodeMapping.charAt(0)), Character.toString(unicodeMapping.charAt(0)),
new int[]{code}, new int[]{code},
font, font,
fontSize, fontSize,
(int) (fontSize * textMatrix.getScalingFactorX()))); (int) (fontSize * textMatrix.getScalingFactorX())));
processTextPosition(new TextPosition(pageRotation, processTextPosition(new TextPosition(pageRotation,
pageSize.getWidth(), pageSize.getWidth(),
pageSize.getHeight(), pageSize.getHeight(),
translatedTextRenderingMatrix, translatedTextRenderingMatrix,
nextX, endX,
nextY, endY,
Math.abs(dyDisplay), Math.abs(dyDisplay),
dxDisplay, dxDisplay,
Math.abs(spaceWidthDisplay), Math.abs(spaceWidthDisplay),
Character.toString(unicodeMapping.charAt(1)), Character.toString(unicodeMapping.charAt(1)),
new int[]{code}, new int[]{code},
font, font,
fontSize, fontSize,
(int) (fontSize * textMatrix.getScalingFactorX()))); (int) (fontSize * textMatrix.getScalingFactorX())));
} else { } else {
processTextPosition(new TextPosition(pageRotation, processTextPosition(new TextPosition(pageRotation,
pageSize.getWidth(), pageSize.getWidth(),
pageSize.getHeight(), pageSize.getHeight(),
translatedTextRenderingMatrix, translatedTextRenderingMatrix,
nextX, endX,
nextY, endY,
Math.abs(dyDisplay), Math.abs(dyDisplay),
dxDisplay, dxDisplay,
Math.abs(spaceWidthDisplay), Math.abs(spaceWidthDisplay),
unicodeMapping, unicodeMapping,
new int[]{code}, new int[]{code},
font, font,
fontSize, fontSize,
(int) (fontSize * textMatrix.getScalingFactorX()))); (int) (fontSize * textMatrix.getScalingFactorX())));
} }
} }

View File

@ -1,12 +1,22 @@
package com.knecon.fforesight.service.layoutparser.server.services; package com.knecon.fforesight.service.layoutparser.server.services;
import java.awt.geom.AffineTransform;
import java.awt.geom.Rectangle2D;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.List; import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import com.knecon.fforesight.service.layoutparser.processor.model.PageContents; import com.knecon.fforesight.service.layoutparser.processor.model.PageContents;
import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition;
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextDirection;
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence; import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations; import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
import com.knecon.fforesight.service.layoutparser.processor.services.PageContentExtractor; import com.knecon.fforesight.service.layoutparser.processor.services.PageContentExtractor;
@ -20,21 +30,88 @@ class PageContentExtractorTest {
@SneakyThrows @SneakyThrows
public void testTextPositionSequenceExtraction() { public void testTextPositionSequenceExtraction() {
String fileName = "files/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (4).pdf"; //String fileName = "files/CLEAN-II-4.2.2.01_家畜残留分析法.pdf";
//String fileName = "files/BASF/2013-1110704.pdf";
//String fileName = "files/ImportRedactionTestFile_highlighted.pdf";
String fileName = "files/HelloWorldHelvetica.pdf";
var tmpFileName = Path.of("/tmp/").resolve(Path.of(fileName).getFileName() + "_TEXT_POSITION_SEQUENCES.pdf").toString(); var tmpFileName = Path.of("/tmp/").resolve(Path.of(fileName).getFileName() + "_TEXT_POSITION_SEQUENCES.pdf").toString();
List<PageContents> textPositionPerPage = PageContentExtractor.getSortedPageContents(fileName); List<PageContents> textPositionPerPage = PageContentExtractor.getSortedPageContents(fileName);
PdfDraw.drawRectanglesPerPageNumberedByLine(fileName, PdfDraw.drawRectanglesPerPage(fileName,
textPositionPerPage.stream() textPositionPerPage.stream()
.map(t -> t.getSortedTextPositionSequences() //.map(t -> t.getSortedTextPositionSequences()
.stream() .map(t -> t.getPositions())
.map(TextPositionSequence::getRectangle) // .stream().flatMap(sequence -> sequence.getTextPositions().stream().map(textPosition -> mapRedTextPositionToInitialUserSpace(textPosition, sequence)))
.map(RectangleTransformations::toRectangle2D)
//.map(textPositionSequence -> (Rectangle2D) new Rectangle2D.Double(textPositionSequence.getMaxXDirAdj(), textPositionSequence.getMaxYDirAdj(), textPositionSequence.getWidth(), textPositionSequence.getHeight())) //.map(textPositionSequence -> (Rectangle2D) new Rectangle2D.Double(textPositionSequence.getMaxXDirAdj(), textPositionSequence.getMaxYDirAdj(), textPositionSequence.getWidth(), textPositionSequence.getHeight()))
.map(List::of) //.map(List::of)
.toList()) //.toList())
.toList(), tmpFileName); .toList(), tmpFileName);
} }
public final int HEIGHT_PADDING = 2;
private Rectangle2D mapRedTextPositionToInitialUserSpace(RedTextPosition textPosition, TextPositionSequence sequence) {
float textHeight = sequence.getTextHeight() + HEIGHT_PADDING;
Rectangle2D rectangle2D = new Rectangle2D.Double(textPosition.getXDirAdj(),
textPosition.getYDirAdj() - textHeight,
textPosition.getWidthDirAdj(),
textHeight + HEIGHT_PADDING);
AffineTransform transform = new AffineTransform();
if (sequence.getDir() == TextDirection.ZERO || sequence.getDir() == TextDirection.HALF_CIRCLE) {
transform.rotate(sequence.getDir().getRadians(), sequence.getPageWidth() / 2f, sequence.getPageHeight() / 2f);
transform.translate(0f, sequence.getPageHeight());
} else if (sequence.getDir() == TextDirection.QUARTER_CIRCLE) {
transform.rotate(sequence.getDir().getRadians(), sequence.getPageWidth() / 2f, sequence.getPageWidth() / 2f);
transform.translate(0f, sequence.getPageWidth());
} else {
transform.rotate(sequence.getDir().getRadians(), sequence.getPageHeight() / 2f, sequence.getPageHeight() / 2f);
transform.translate(0f, sequence.getPageWidth());
}
transform.scale(1., -1.);
return transform.createTransformedShape(rectangle2D).getBounds2D();
}
@Test
@SneakyThrows
public void generatePDF() {
// Create a new PDF document
PDDocument document = new PDDocument();
// Create a blank page
PDPage page = new PDPage();
document.addPage(page);
// Load the Helvetica font
PDFont font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
// Start a content stream to write text
PDPageContentStream contentStream = new PDPageContentStream(document, page);
contentStream.beginText();
// Set font and font size
contentStream.setFont(font, 12);
// Set text position
contentStream.newLineAtOffset(50, 700);
// Write the text
contentStream.showText("Hello World in Helvetica!");
// Finish writing text
contentStream.endText();
contentStream.close();
// Save the PDF
document.save("/tmp/MyPDF.pdf");
document.close();
System.out.println("PDF created successfully!");
}
} }

View File

@ -64,11 +64,11 @@ public class PdfDraw {
var rectanglesInLine = rectanglesOnPage.get(lineNumber); var rectanglesInLine = rectanglesOnPage.get(lineNumber);
PdfVisualisationUtility.drawRectangle2DList(pdDocument, pageNumber, rectanglesInLine, PdfVisualisationUtility.Options.builder().stroke(true).build()); PdfVisualisationUtility.drawRectangle2DList(pdDocument, pageNumber, rectanglesInLine, PdfVisualisationUtility.Options.builder().stroke(true).build());
double y = Math.min(rectanglesInLine.get(0).getMinY(), rectanglesInLine.get(0).getMaxY()); double y = Math.min(rectanglesInLine.get(0).getMinY(), rectanglesInLine.get(0).getMaxY());
PdfVisualisationUtility.drawText(String.format("%d", lineNumber), /**PdfVisualisationUtility.drawText(String.format("%d", lineNumber),
pdDocument, pdDocument,
new Point2D.Double(rectanglesInLine.get(0).getX() - (5 + (5 * countNumberOfDigits(lineNumber))), y + 2), new Point2D.Double(rectanglesInLine.get(0).getX() - (5 + (5 * countNumberOfDigits(lineNumber))), y + 2),
pageNumber, pageNumber,
PdfVisualisationUtility.Options.builder().stroke(true).build()); PdfVisualisationUtility.Options.builder().stroke(true).build());**/
} }
} }
pdDocument.save(out); pdDocument.save(out);
@ -252,12 +252,12 @@ public class PdfDraw {
rectangle2D = RectangleTransformations.pad(rectangle2D, 10, 10); rectangle2D = RectangleTransformations.pad(rectangle2D, 10, 10);
} }
drawRectangle2DList(document, page.getNumber(), List.of(rectangle2D), options); drawRectangle2DList(document, page.getNumber(), List.of(rectangle2D), options);
drawText(buildString(entry), /**drawText(buildString(entry),
document, document,
new Point2D.Double(rectangle2D.getMinX(), rectangle2D.getMaxY() + 2), new Point2D.Double(rectangle2D.getMinX(), rectangle2D.getMaxY() + 2),
page.getNumber(), page.getNumber(),
options, options,
entry.getType() == NodeType.TABLE_CELL); entry.getType() == NodeType.TABLE_CELL);**/
} }
} }