Merge branch 'RED-8800' into 'main'
RED-8800: adjust coordinates in BE to ignore cropbox See merge request fforesight/layout-parser!179
This commit is contained in:
commit
ea18d3d307
@ -82,6 +82,7 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
|
||||
|
||||
private int pageRotation;
|
||||
private PDRectangle pageSize;
|
||||
private Matrix translateMatrix;
|
||||
private final GlyphList glyphList;
|
||||
private final Map<COSDictionary, Float> fontHeightMap = new WeakHashMap<COSDictionary, Float>();
|
||||
|
||||
@ -133,6 +134,12 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
|
||||
this.pageRotation = page.getRotation();
|
||||
this.pageSize = page.getCropBox();
|
||||
|
||||
if (pageSize.getLowerLeftX() == 0 && pageSize.getLowerLeftY() == 0) {
|
||||
translateMatrix = null;
|
||||
} else {
|
||||
// translation matrix for cropbox
|
||||
translateMatrix = Matrix.getTranslateInstance(-pageSize.getLowerLeftX(), -pageSize.getLowerLeftY());
|
||||
}
|
||||
super.processPage(page);
|
||||
}
|
||||
|
||||
@ -257,13 +264,22 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
|
||||
return;
|
||||
}
|
||||
}
|
||||
// adjust for cropbox if needed
|
||||
Matrix translatedTextRenderingMatrix;
|
||||
if (translateMatrix == null) {
|
||||
translatedTextRenderingMatrix = textRenderingMatrix;
|
||||
} else {
|
||||
translatedTextRenderingMatrix = Matrix.concatenate(translateMatrix, textRenderingMatrix);
|
||||
nextX -= pageSize.getLowerLeftX();
|
||||
nextY -= pageSize.getLowerLeftY();
|
||||
}
|
||||
|
||||
// This is a hack for unicode letter with 2 chars e.g. RA see unicodeProblem.pdf
|
||||
if (unicodeMapping.length() == 2) {
|
||||
processTextPosition(new TextPosition(pageRotation,
|
||||
pageSize.getWidth(),
|
||||
pageSize.getHeight(),
|
||||
textRenderingMatrix,
|
||||
translatedTextRenderingMatrix,
|
||||
nextX,
|
||||
nextY,
|
||||
Math.abs(dyDisplay),
|
||||
@ -277,7 +293,7 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
|
||||
processTextPosition(new TextPosition(pageRotation,
|
||||
pageSize.getWidth(),
|
||||
pageSize.getHeight(),
|
||||
textRenderingMatrix,
|
||||
translatedTextRenderingMatrix,
|
||||
nextX,
|
||||
nextY,
|
||||
Math.abs(dyDisplay),
|
||||
@ -293,7 +309,7 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
|
||||
processTextPosition(new TextPosition(pageRotation,
|
||||
pageSize.getWidth(),
|
||||
pageSize.getHeight(),
|
||||
textRenderingMatrix,
|
||||
translatedTextRenderingMatrix,
|
||||
nextX,
|
||||
nextY,
|
||||
Math.abs(dyDisplay),
|
||||
|
||||
@ -30,11 +30,11 @@ public class LayoutparserEnd2EndTest extends AbstractTest {
|
||||
@Autowired
|
||||
private LayoutParsingPipeline layoutParsingPipeline;
|
||||
|
||||
@Disabled
|
||||
@Test
|
||||
@Disabled
|
||||
public void testLayoutParserEndToEnd() {
|
||||
|
||||
String filePath = "files/Minimal Examples/RotateTextWithRulingsTestFile.pdf";
|
||||
String filePath = "/tmp/OCR_TEST/10.SYN524464 FS (A16148C) - Absorção cutânea.pdf/document.pdf";
|
||||
|
||||
runForFile(filePath);
|
||||
}
|
||||
|
||||
@ -53,7 +53,7 @@ public class VisualizationWriter {
|
||||
begin(page);
|
||||
|
||||
AffineTransform textDeRotationMatrix = getTextDeRotationTransform(page);
|
||||
AffineTransform pageTransformation = getTextDeRotationTransform(page);
|
||||
AffineTransform pageTransformation = getCropboxAdjustment(page);
|
||||
|
||||
for (LayerGroup layerGroup : layerGroups) {
|
||||
|
||||
@ -68,7 +68,7 @@ public class VisualizationWriter {
|
||||
Element escape = builder.createGroupBegin();
|
||||
writer.writeElement(escape);
|
||||
|
||||
writeVisualizations(pageNumber, layerGroup, textDeRotationMatrix);
|
||||
writeVisualizations(pageNumber, layerGroup, textDeRotationMatrix, pageTransformation);
|
||||
|
||||
Element escapeEnd = builder.createGroupEnd();
|
||||
writer.writeElement(escapeEnd);
|
||||
@ -87,7 +87,14 @@ public class VisualizationWriter {
|
||||
}
|
||||
|
||||
|
||||
private void writeVisualizations(int pageNumber, LayerGroup layerGroup, AffineTransform textDeRotationMatrix) throws PDFNetException {
|
||||
@SneakyThrows
|
||||
private AffineTransform getCropboxAdjustment(Page page) {
|
||||
|
||||
return new AffineTransform(1, 0, 0, 1, page.getCropBox().getX1(), page.getCropBox().getY1());
|
||||
}
|
||||
|
||||
|
||||
private void writeVisualizations(int pageNumber, LayerGroup layerGroup, AffineTransform textDeRotationMatrix, AffineTransform pageTransformation) throws PDFNetException {
|
||||
|
||||
for (Visualizations visualization : layerGroup.getVisualizations()) {
|
||||
|
||||
@ -108,8 +115,7 @@ public class VisualizationWriter {
|
||||
Element escape = builder.createGroupBegin();
|
||||
writer.writeElement(escape);
|
||||
|
||||
|
||||
writeVisualization(visualizationsOnPage, textDeRotationMatrix);
|
||||
writeVisualization(visualizationsOnPage, textDeRotationMatrix, pageTransformation);
|
||||
|
||||
Element escapeEnd = builder.createGroupEnd();
|
||||
writer.writeElement(escapeEnd);
|
||||
@ -139,7 +145,7 @@ public class VisualizationWriter {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void writeVisualization(VisualizationsOnPage visualizationsOnPage, AffineTransform textDeRotationMatrix) {
|
||||
private void writeVisualization(VisualizationsOnPage visualizationsOnPage, AffineTransform textDeRotationMatrix, AffineTransform pageTransformation) {
|
||||
|
||||
if (visualizationsOnPage.isMakePathsInvisible()) {
|
||||
Element rect = builder.createRect(0, 0, 0, 0);
|
||||
@ -149,28 +155,28 @@ public class VisualizationWriter {
|
||||
|
||||
for (ColoredLine coloredLine : visualizationsOnPage.getColoredLines()) {
|
||||
|
||||
drawColoredLine(coloredLine);
|
||||
drawColoredLine(coloredLine, pageTransformation);
|
||||
}
|
||||
|
||||
for (ColoredRectangle coloredRectangle : visualizationsOnPage.getColoredRectangles()) {
|
||||
|
||||
drawColoredRectangle(coloredRectangle);
|
||||
drawColoredRectangle(coloredRectangle, pageTransformation);
|
||||
}
|
||||
|
||||
for (FilledRectangle filledRectangle : visualizationsOnPage.getFilledRectangles()) {
|
||||
|
||||
drawFilledRectangle(filledRectangle);
|
||||
drawFilledRectangle(filledRectangle, pageTransformation);
|
||||
}
|
||||
|
||||
for (PlacedText placedText : visualizationsOnPage.getPlacedTexts()) {
|
||||
|
||||
writePlacedText(textDeRotationMatrix, placedText);
|
||||
writePlacedText(textDeRotationMatrix, placedText, pageTransformation);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
private void writePlacedText(AffineTransform textDeRotationMatrix, PlacedText placedText) throws PDFNetException {
|
||||
private void writePlacedText(AffineTransform textDeRotationMatrix, PlacedText placedText, AffineTransform pageTransformation) throws PDFNetException {
|
||||
|
||||
float[] rgbComponents = placedText.color().getRGBColorComponents(null);
|
||||
Font font = fontMap.get(placedText.font());
|
||||
@ -180,7 +186,7 @@ public class VisualizationWriter {
|
||||
text.getGState().setFillColor(color);
|
||||
}
|
||||
|
||||
try (Matrix2D textMatrix = getTextMatrix(placedText, textDeRotationMatrix)) {
|
||||
try (Matrix2D textMatrix = getTextMatrix(placedText, textDeRotationMatrix, pageTransformation)) {
|
||||
text.setTextMatrix(textMatrix);
|
||||
}
|
||||
|
||||
@ -217,11 +223,13 @@ public class VisualizationWriter {
|
||||
}
|
||||
|
||||
|
||||
private void drawFilledRectangle(FilledRectangle filledRectangle) throws PDFNetException {
|
||||
private void drawFilledRectangle(FilledRectangle filledRectangle, AffineTransform pageTransformation) throws PDFNetException {
|
||||
|
||||
float[] rgbComponents = filledRectangle.color().getRGBColorComponents(null);
|
||||
Rectangle2D r = filledRectangle.rectangle2D();
|
||||
|
||||
r = transformRect(r, pageTransformation);
|
||||
|
||||
Element rect = builder.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight());
|
||||
|
||||
rect.setPathFill(true);
|
||||
@ -237,11 +245,13 @@ public class VisualizationWriter {
|
||||
}
|
||||
|
||||
|
||||
private void drawColoredRectangle(ColoredRectangle coloredRectangle) throws PDFNetException {
|
||||
private void drawColoredRectangle(ColoredRectangle coloredRectangle, AffineTransform pageTransformation) throws PDFNetException {
|
||||
|
||||
float[] rgbComponents = coloredRectangle.color().getRGBColorComponents(null);
|
||||
Rectangle2D r = coloredRectangle.rectangle2D();
|
||||
|
||||
r = transformRect(r, pageTransformation);
|
||||
|
||||
Element rect = builder.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight());
|
||||
|
||||
rect.setPathStroke(true);
|
||||
@ -257,11 +267,19 @@ public class VisualizationWriter {
|
||||
}
|
||||
|
||||
|
||||
private void drawColoredLine(ColoredLine coloredLine) throws PDFNetException {
|
||||
private Rectangle2D transformRect(Rectangle2D r, AffineTransform pageTransformation) {
|
||||
|
||||
return pageTransformation.createTransformedShape(r).getBounds2D();
|
||||
}
|
||||
|
||||
|
||||
private void drawColoredLine(ColoredLine coloredLine, AffineTransform pageTransformation) throws PDFNetException {
|
||||
|
||||
float[] rgbComponents = coloredLine.color().getRGBColorComponents(null);
|
||||
Line2D l = coloredLine.line();
|
||||
|
||||
l = transformLine(pageTransformation, l);
|
||||
|
||||
builder.pathBegin();
|
||||
builder.moveTo(l.getX1(), l.getY1());
|
||||
builder.lineTo(l.getX2(), l.getY2());
|
||||
@ -279,8 +297,15 @@ public class VisualizationWriter {
|
||||
}
|
||||
|
||||
|
||||
private static Line2D transformLine(AffineTransform pageTransformation, Line2D line) {
|
||||
|
||||
return new Line2D.Double(pageTransformation.transform(line.getP1(), null), pageTransformation.transform(line.getP2(), null));
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private static Matrix2D getTextMatrix(PlacedText placedText, AffineTransform textDeRotationMatrix) {
|
||||
@SuppressWarnings("PMD.CloseResource")
|
||||
private static Matrix2D getTextMatrix(PlacedText placedText, AffineTransform textDeRotationMatrix, AffineTransform pageTransformation) {
|
||||
|
||||
Matrix2D textMatrix;
|
||||
if (placedText.textMatrix().isEmpty()) {
|
||||
@ -289,7 +314,12 @@ public class VisualizationWriter {
|
||||
var matrix = placedText.textMatrix().get();
|
||||
textMatrix = toMatrix2D(matrix);
|
||||
}
|
||||
return textMatrix;
|
||||
try (var pageMatrix = toMatrix2D(pageTransformation)) {
|
||||
return pageMatrix.multiply(textMatrix);
|
||||
} finally {
|
||||
textMatrix.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user