Compare commits
8 Commits
main
...
release/0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ca69d3f5dc | ||
|
|
366c12bab0 | ||
|
|
4f3f72bcbc | ||
|
|
6133b142cf | ||
|
|
c3d24393ea | ||
|
|
d4500b879b | ||
|
|
44277572ab | ||
|
|
cbf809316b |
@ -1,5 +1,7 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.model.text;
|
||||
|
||||
import static com.knecon.fforesight.service.layoutparser.processor.utils.ParsingConstants.NEW_LINE_TEXT_HEIGHT_PERCENTAGE;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -208,7 +210,7 @@ public class TextPageBlock extends AbstractPageBlock {
|
||||
TextPositionSequence previous = null;
|
||||
for (TextPositionSequence word : sequences) {
|
||||
if (previous != null) {
|
||||
if (Math.abs(previous.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight()) {
|
||||
if (Math.abs(previous.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight() * NEW_LINE_TEXT_HEIGHT_PERCENTAGE) {
|
||||
sb.append('\n');
|
||||
} else {
|
||||
sb.append(' ');
|
||||
@ -228,7 +230,7 @@ public class TextPageBlock extends AbstractPageBlock {
|
||||
TextPositionSequence previous = null;
|
||||
for (TextPositionSequence word : sequences) {
|
||||
if (previous != null) {
|
||||
if (word.getMaxYDirAdj() - previous.getMaxYDirAdj() > word.getTextHeight()) {
|
||||
if (word.getMaxYDirAdj() - previous.getMaxYDirAdj() > word.getTextHeight() * NEW_LINE_TEXT_HEIGHT_PERCENTAGE) {
|
||||
numberOfLines++;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.factory;
|
||||
|
||||
import static com.knecon.fforesight.service.layoutparser.processor.utils.ParsingConstants.NEW_LINE_TEXT_HEIGHT_PERCENTAGE;
|
||||
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Collection;
|
||||
@ -67,7 +69,6 @@ public class SearchTextWithTextPositionFactory {
|
||||
++context.stringIdx;
|
||||
}
|
||||
|
||||
|
||||
List<Rectangle2D> positions = sequences.stream()
|
||||
.map(TextPositionSequence::getTextPositions)
|
||||
.flatMap(Collection::stream)
|
||||
@ -161,7 +162,7 @@ public class SearchTextWithTextPositionFactory {
|
||||
}
|
||||
|
||||
double deltaY = Math.abs(currentPosition.getYDirAdj() - previousPosition.getYDirAdj());
|
||||
return deltaY >= currentPosition.getHeightDir();
|
||||
return deltaY >= currentPosition.getHeightDir() * NEW_LINE_TEXT_HEIGHT_PERCENTAGE;
|
||||
}
|
||||
|
||||
|
||||
@ -191,9 +192,9 @@ public class SearchTextWithTextPositionFactory {
|
||||
|
||||
float textHeight = sequence.getTextHeight() + HEIGHT_PADDING;
|
||||
Rectangle2D rectangle2D = new Rectangle2D.Double(textPosition.getXDirAdj(),
|
||||
textPosition.getYDirAdj() - textHeight,
|
||||
textPosition.getWidthDirAdj(),
|
||||
textHeight + HEIGHT_PADDING);
|
||||
textPosition.getYDirAdj() - textHeight,
|
||||
textPosition.getWidthDirAdj(),
|
||||
textHeight + HEIGHT_PADDING);
|
||||
|
||||
AffineTransform transform = new AffineTransform();
|
||||
|
||||
|
||||
@ -82,6 +82,7 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
|
||||
|
||||
private int pageRotation;
|
||||
private PDRectangle pageSize;
|
||||
private Matrix translateMatrix;
|
||||
private final GlyphList glyphList;
|
||||
private final Map<COSDictionary, Float> fontHeightMap = new WeakHashMap<COSDictionary, Float>();
|
||||
|
||||
@ -133,6 +134,12 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
|
||||
this.pageRotation = page.getRotation();
|
||||
this.pageSize = page.getCropBox();
|
||||
|
||||
if (pageSize.getLowerLeftX() == 0 && pageSize.getLowerLeftY() == 0) {
|
||||
translateMatrix = null;
|
||||
} else {
|
||||
// translation matrix for cropbox
|
||||
translateMatrix = Matrix.getTranslateInstance(-pageSize.getLowerLeftX(), -pageSize.getLowerLeftY());
|
||||
}
|
||||
super.processPage(page);
|
||||
}
|
||||
|
||||
@ -257,13 +264,22 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
|
||||
return;
|
||||
}
|
||||
}
|
||||
// adjust for cropbox if needed
|
||||
Matrix translatedTextRenderingMatrix;
|
||||
if (translateMatrix == null) {
|
||||
translatedTextRenderingMatrix = textRenderingMatrix;
|
||||
} else {
|
||||
translatedTextRenderingMatrix = Matrix.concatenate(translateMatrix, textRenderingMatrix);
|
||||
nextX -= pageSize.getLowerLeftX();
|
||||
nextY -= pageSize.getLowerLeftY();
|
||||
}
|
||||
|
||||
// This is a hack for unicode letter with 2 chars e.g. RA see unicodeProblem.pdf
|
||||
if (unicodeMapping.length() == 2) {
|
||||
processTextPosition(new TextPosition(pageRotation,
|
||||
pageSize.getWidth(),
|
||||
pageSize.getHeight(),
|
||||
textRenderingMatrix,
|
||||
translatedTextRenderingMatrix,
|
||||
nextX,
|
||||
nextY,
|
||||
Math.abs(dyDisplay),
|
||||
@ -277,7 +293,7 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
|
||||
processTextPosition(new TextPosition(pageRotation,
|
||||
pageSize.getWidth(),
|
||||
pageSize.getHeight(),
|
||||
textRenderingMatrix,
|
||||
translatedTextRenderingMatrix,
|
||||
nextX,
|
||||
nextY,
|
||||
Math.abs(dyDisplay),
|
||||
@ -293,7 +309,7 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
|
||||
processTextPosition(new TextPosition(pageRotation,
|
||||
pageSize.getWidth(),
|
||||
pageSize.getHeight(),
|
||||
textRenderingMatrix,
|
||||
translatedTextRenderingMatrix,
|
||||
nextX,
|
||||
nextY,
|
||||
Math.abs(dyDisplay),
|
||||
|
||||
@ -0,0 +1,7 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.utils;
|
||||
|
||||
public class ParsingConstants {
|
||||
|
||||
public final static float NEW_LINE_TEXT_HEIGHT_PERCENTAGE = 0.6f;
|
||||
|
||||
}
|
||||
@ -51,13 +51,47 @@ public class TextPositionSequenceComparator implements Comparator<TextPositionSe
|
||||
|
||||
double yDifference = Math.abs(pos1YBottom - pos2YBottom);
|
||||
|
||||
// we will do a simple tolerance comparison
|
||||
if (yDifference < .1 || pos2YBottom >= pos1YTop && pos2YBottom <= pos1YBottom || pos1YBottom >= pos2YTop && pos1YBottom <= pos2YBottom) {
|
||||
return Double.compare(x1, x2);
|
||||
} else if (pos1YBottom < pos2YBottom) {
|
||||
return -1;
|
||||
} else {
|
||||
return 1;
|
||||
// Adjust for text rotation
|
||||
switch (pos1.getRotation()) {
|
||||
case 0:
|
||||
// 0 degrees (horizontal, top to bottom and left to right): Sort primarily by y-coordinates from top to bottom (pos1YBottom < pos2YBottom).
|
||||
if (yDifference < .1 || (pos2YBottom >= pos1YTop && pos2YBottom <= pos1YBottom) || (pos1YBottom >= pos2YTop && pos1YBottom <= pos2YBottom)) {
|
||||
return Double.compare(x1, x2);
|
||||
} else if (pos1YBottom < pos2YBottom) {
|
||||
return -1;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
case 90:
|
||||
// 90 degrees (vertical, right to left): Sort by x-coordinates first (x1 > x2), then by y-coordinates from top to bottom (pos1YBottom < pos2YBottom).
|
||||
if (x1 > x2) {
|
||||
return -1;
|
||||
} else if (x1 < x2) {
|
||||
return 1;
|
||||
} else {
|
||||
return Double.compare(pos1YBottom, pos2YBottom);
|
||||
}
|
||||
case 180:
|
||||
// 180 degrees (horizontal, bottom to top and right to left): Sort primarily by y-coordinates from bottom to top (pos1YBottom > pos2YBottom).
|
||||
if (yDifference < .1 || (pos2YBottom >= pos1YTop && pos2YBottom <= pos1YBottom) || (pos1YBottom >= pos2YTop && pos1YBottom <= pos2YBottom)) {
|
||||
return Double.compare(x2, x1);
|
||||
|
||||
} else if (pos1YBottom > pos2YBottom) {
|
||||
return -1;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
case 270:
|
||||
// 270 degrees (vertical, left to right): Sort by x-coordinates in reverse (x2 > x1), then by y-coordinates from bottom to top (pos2YBottom > pos1YBottom).
|
||||
if (x2 > x1) {
|
||||
return -1;
|
||||
} else if (x2 < x1) {
|
||||
return 1;
|
||||
} else {
|
||||
return Double.compare(pos2YBottom, pos1YBottom);
|
||||
}
|
||||
default:
|
||||
throw new RuntimeException("Rotation not supported. Only 0/90/180/270 degree rotation is supported.");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -32,6 +32,7 @@ dependencies {
|
||||
implementation("com.iqser.red.commons:storage-commons:2.45.0")
|
||||
implementation("com.knecon.fforesight:tenant-commons:0.21.0")
|
||||
implementation("com.knecon.fforesight:tracing-commons:0.5.0")
|
||||
implementation("com.knecon.fforesight:lifecycle-commons:0.6.0")
|
||||
implementation("org.springframework.boot:spring-boot-starter-actuator:${springBootStarterVersion}")
|
||||
implementation("org.springframework.boot:spring-boot-starter-amqp:${springBootStarterVersion}")
|
||||
implementation("com.amazonaws:aws-java-sdk-s3:1.12.536")
|
||||
|
||||
@ -6,19 +6,22 @@ import org.springframework.boot.autoconfigure.ImportAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.EnableAspectJAutoProxy;
|
||||
import org.springframework.context.annotation.Import;
|
||||
|
||||
import com.amazonaws.services.s3.model.metrics.MetricsConfiguration;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.knecon.fforesight.lifecyclecommons.LifecycleAutoconfiguration;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration;
|
||||
import com.knecon.fforesight.tenantcommons.MultiTenancyAutoConfiguration;
|
||||
|
||||
import io.micrometer.observation.ObservationRegistry;
|
||||
import io.micrometer.observation.aop.ObservedAspect;
|
||||
|
||||
@ImportAutoConfiguration({MultiTenancyAutoConfiguration.class})
|
||||
@ImportAutoConfiguration({MultiTenancyAutoConfiguration.class, LifecycleAutoconfiguration.class})
|
||||
@Import({MetricsConfiguration.class, StorageAutoConfiguration.class, LayoutParsingServiceProcessorConfiguration.class})
|
||||
@SpringBootApplication(exclude = {SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class})
|
||||
@EnableAspectJAutoProxy
|
||||
public class Application {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
@ -15,6 +15,9 @@ project.version: 1.0-SNAPSHOT
|
||||
server:
|
||||
port: 8080
|
||||
|
||||
lifecycle:
|
||||
base-package: com.knecon.fforesight.service.layoutparser
|
||||
|
||||
spring:
|
||||
application:
|
||||
name: layoutparser-service
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.knecon.fforesight.service.viewerdoc.service;
|
||||
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.awt.geom.Line2D;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
@ -84,6 +85,7 @@ public class ViewerDocumentService {
|
||||
pdPage.setContents(ContentStreamUtility.removeLayerFromContentStreams(allLayers, classifiers));
|
||||
|
||||
AffineTransform textDeRotationMatrix = getTextDeRotationTransform(pdPage);
|
||||
AffineTransform pageTransformationMatrix = getPageTransformationMatrix(pdPage);
|
||||
|
||||
if (!ContentStreamClassifier.areAllContentStreamsEscaped(classifiers)) {
|
||||
// We need to save the graphics state before, such that our appended content cannot be affected by previous content streams with side effects,
|
||||
@ -106,7 +108,11 @@ public class ViewerDocumentService {
|
||||
|
||||
contentStream.saveGraphicsState();
|
||||
|
||||
drawVisualizationsToContentStream(pdDocument, visualization.getVisualizationsOnPages().get(pageNumber), contentStream, textDeRotationMatrix);
|
||||
drawVisualizationsToContentStream(pdDocument,
|
||||
visualization.getVisualizationsOnPages().get(pageNumber),
|
||||
contentStream,
|
||||
textDeRotationMatrix,
|
||||
pageTransformationMatrix);
|
||||
|
||||
contentStream.restoreGraphicsState();
|
||||
|
||||
@ -133,6 +139,12 @@ public class ViewerDocumentService {
|
||||
}
|
||||
|
||||
|
||||
private AffineTransform getPageTransformationMatrix(PDPage pdPage) {
|
||||
|
||||
return new AffineTransform(1, 0, 0, 1, pdPage.getCropBox().getLowerLeftX(), pdPage.getCropBox().getLowerLeftY());
|
||||
}
|
||||
|
||||
|
||||
private static Map<ContentStreams.Identifier, PDOptionalContentGroup> addLayersToDocument(List<Visualizations> visualizations, PDDocument pdDocument) {
|
||||
|
||||
Map<ContentStreams.Identifier, PDOptionalContentGroup> optionalContentGroupMap = new HashMap<>();
|
||||
@ -147,7 +159,8 @@ public class ViewerDocumentService {
|
||||
private static void drawVisualizationsToContentStream(PDDocument pdDocument,
|
||||
VisualizationsOnPage visualizationsOnPage,
|
||||
PDPageContentStream contentStream,
|
||||
AffineTransform textDeRotationMatrix) throws IOException {
|
||||
AffineTransform textDeRotationMatrix,
|
||||
AffineTransform pageTransformationMatrix) throws IOException {
|
||||
|
||||
if (visualizationsOnPage.isMakePathsInvisible()) {
|
||||
contentStream.addRect(0, 0, 1, 1);
|
||||
@ -155,17 +168,18 @@ public class ViewerDocumentService {
|
||||
}
|
||||
|
||||
for (ColoredLine coloredLine : visualizationsOnPage.getColoredLines()) {
|
||||
Line2D line = transformLine(coloredLine.line(), pageTransformationMatrix);
|
||||
contentStream.setLineWidth(coloredLine.lineWidth());
|
||||
contentStream.setStrokingColor(coloredLine.color());
|
||||
contentStream.moveTo((float) coloredLine.line().getX1(), (float) coloredLine.line().getY1());
|
||||
contentStream.lineTo((float) coloredLine.line().getX2(), (float) coloredLine.line().getY2());
|
||||
contentStream.moveTo((float) line.getX1(), (float) line.getY1());
|
||||
contentStream.lineTo((float) line.getX2(), (float) line.getY2());
|
||||
contentStream.stroke();
|
||||
}
|
||||
|
||||
for (ColoredRectangle coloredRectangle : visualizationsOnPage.getColoredRectangles()) {
|
||||
contentStream.setLineWidth(coloredRectangle.lineWidth());
|
||||
contentStream.setStrokingColor(coloredRectangle.color());
|
||||
Rectangle2D r = coloredRectangle.rectangle2D();
|
||||
Rectangle2D r = transformRect(coloredRectangle.rectangle2D(), pageTransformationMatrix);
|
||||
contentStream.addRect((float) r.getX(), (float) r.getY(), (float) r.getWidth(), (float) r.getHeight());
|
||||
contentStream.stroke();
|
||||
}
|
||||
@ -175,7 +189,7 @@ public class ViewerDocumentService {
|
||||
PDExtendedGraphicsState graphicsState = new PDExtendedGraphicsState();
|
||||
graphicsState.setNonStrokingAlphaConstant(filledRectangle.alpha());
|
||||
contentStream.setGraphicsStateParameters(graphicsState);
|
||||
Rectangle2D r = filledRectangle.rectangle2D();
|
||||
Rectangle2D r = transformRect(filledRectangle.rectangle2D(), pageTransformationMatrix);
|
||||
contentStream.addRect((float) r.getX(), (float) r.getY(), (float) r.getWidth(), (float) r.getHeight());
|
||||
contentStream.fill();
|
||||
}
|
||||
@ -190,7 +204,7 @@ public class ViewerDocumentService {
|
||||
} else {
|
||||
contentStream.setRenderingMode(RenderingMode.FILL);
|
||||
}
|
||||
Matrix textMatrix = getTextMatrix(placedText, textDeRotationMatrix);
|
||||
Matrix textMatrix = getTextMatrix(placedText, textDeRotationMatrix, pageTransformationMatrix);
|
||||
contentStream.setTextMatrix(textMatrix);
|
||||
contentStream.showText(placedText.text());
|
||||
contentStream.endText();
|
||||
@ -223,7 +237,7 @@ public class ViewerDocumentService {
|
||||
}
|
||||
|
||||
|
||||
private static Matrix getTextMatrix(PlacedText placedText, AffineTransform textDeRotationMatrix) {
|
||||
private static Matrix getTextMatrix(PlacedText placedText, AffineTransform textDeRotationMatrix, AffineTransform pageTransformationMatrix) {
|
||||
|
||||
Matrix textMatrix;
|
||||
if (placedText.textMatrix().isEmpty()) {
|
||||
@ -236,7 +250,7 @@ public class ViewerDocumentService {
|
||||
} else {
|
||||
textMatrix = placedText.textMatrix().get();
|
||||
}
|
||||
return textMatrix;
|
||||
return new Matrix(pageTransformationMatrix).multiply(textMatrix);
|
||||
}
|
||||
|
||||
|
||||
@ -313,4 +327,16 @@ public class ViewerDocumentService {
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
private static Line2D transformLine(Line2D line, AffineTransform pageTransformation) {
|
||||
|
||||
return new Line2D.Double(pageTransformation.transform(line.getP1(), null), pageTransformation.transform(line.getP2(), null));
|
||||
}
|
||||
|
||||
|
||||
private static Rectangle2D transformRect(Rectangle2D r, AffineTransform pageTransformation) {
|
||||
|
||||
return pageTransformation.createTransformedShape(r).getBounds2D();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user