Compare commits

...

8 Commits

Author SHA1 Message Date
Dominique Eifländer
ca69d3f5dc Merge branch 'RED-9760-4.1' into 'release/0.142.x'
RED-9760: Fixed missing newLines

See merge request fforesight/layout-parser!185
2024-07-30 16:06:30 +02:00
Dominique Eifländer
366c12bab0 RED-9760: Fixed missing newLines 2024-07-30 15:46:40 +02:00
Andrei Isvoran
4f3f72bcbc Merge branch 'RED-9607' into 'release/0.142.x'
RED-9607 - Correctly determine text position sequence based on file rotation

See merge request fforesight/layout-parser!183
2024-07-25 14:11:03 +02:00
Andrei Isvoran
6133b142cf RED-9607 - Correctly determine text position sequence based on file rotation 2024-07-24 16:32:54 +03:00
Kilian Schüttler
c3d24393ea Merge branch 'RED-8800-bp' into 'release/0.142.x'
Red 8800 bp

See merge request fforesight/layout-parser!180
2024-07-15 17:58:59 +02:00
Kilian Schüttler
d4500b879b Red 8800 bp 2024-07-15 17:58:59 +02:00
Andrei Isvoran
44277572ab Merge branch 'RED-9496-shutdown' into 'release/0.142.x'
RED-9496 - Implement graceful shutdown

See merge request fforesight/layout-parser!175
2024-07-04 14:25:58 +02:00
Andrei Isvoran
cbf809316b RED-9496 - Implement graceful shutdown 2024-07-04 14:19:19 +03:00
9 changed files with 120 additions and 27 deletions

View File

@ -1,5 +1,7 @@
package com.knecon.fforesight.service.layoutparser.processor.model.text;
import static com.knecon.fforesight.service.layoutparser.processor.utils.ParsingConstants.NEW_LINE_TEXT_HEIGHT_PERCENTAGE;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.List;
@ -208,7 +210,7 @@ public class TextPageBlock extends AbstractPageBlock {
TextPositionSequence previous = null;
for (TextPositionSequence word : sequences) {
if (previous != null) {
if (Math.abs(previous.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight()) {
if (Math.abs(previous.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight() * NEW_LINE_TEXT_HEIGHT_PERCENTAGE) {
sb.append('\n');
} else {
sb.append(' ');
@ -228,7 +230,7 @@ public class TextPageBlock extends AbstractPageBlock {
TextPositionSequence previous = null;
for (TextPositionSequence word : sequences) {
if (previous != null) {
if (word.getMaxYDirAdj() - previous.getMaxYDirAdj() > word.getTextHeight()) {
if (word.getMaxYDirAdj() - previous.getMaxYDirAdj() > word.getTextHeight() * NEW_LINE_TEXT_HEIGHT_PERCENTAGE) {
numberOfLines++;
}
}

View File

@ -1,5 +1,7 @@
package com.knecon.fforesight.service.layoutparser.processor.services.factory;
import static com.knecon.fforesight.service.layoutparser.processor.utils.ParsingConstants.NEW_LINE_TEXT_HEIGHT_PERCENTAGE;
import java.awt.geom.AffineTransform;
import java.awt.geom.Rectangle2D;
import java.util.Collection;
@ -67,7 +69,6 @@ public class SearchTextWithTextPositionFactory {
++context.stringIdx;
}
List<Rectangle2D> positions = sequences.stream()
.map(TextPositionSequence::getTextPositions)
.flatMap(Collection::stream)
@ -161,7 +162,7 @@ public class SearchTextWithTextPositionFactory {
}
double deltaY = Math.abs(currentPosition.getYDirAdj() - previousPosition.getYDirAdj());
return deltaY >= currentPosition.getHeightDir();
return deltaY >= currentPosition.getHeightDir() * NEW_LINE_TEXT_HEIGHT_PERCENTAGE;
}
@ -191,9 +192,9 @@ public class SearchTextWithTextPositionFactory {
float textHeight = sequence.getTextHeight() + HEIGHT_PADDING;
Rectangle2D rectangle2D = new Rectangle2D.Double(textPosition.getXDirAdj(),
textPosition.getYDirAdj() - textHeight,
textPosition.getWidthDirAdj(),
textHeight + HEIGHT_PADDING);
textPosition.getYDirAdj() - textHeight,
textPosition.getWidthDirAdj(),
textHeight + HEIGHT_PADDING);
AffineTransform transform = new AffineTransform();

View File

@ -82,6 +82,7 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
private int pageRotation;
private PDRectangle pageSize;
private Matrix translateMatrix;
private final GlyphList glyphList;
private final Map<COSDictionary, Float> fontHeightMap = new WeakHashMap<COSDictionary, Float>();
@ -133,6 +134,12 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
this.pageRotation = page.getRotation();
this.pageSize = page.getCropBox();
if (pageSize.getLowerLeftX() == 0 && pageSize.getLowerLeftY() == 0) {
translateMatrix = null;
} else {
// translation matrix for cropbox
translateMatrix = Matrix.getTranslateInstance(-pageSize.getLowerLeftX(), -pageSize.getLowerLeftY());
}
super.processPage(page);
}
@ -257,13 +264,22 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
return;
}
}
// adjust for cropbox if needed
Matrix translatedTextRenderingMatrix;
if (translateMatrix == null) {
translatedTextRenderingMatrix = textRenderingMatrix;
} else {
translatedTextRenderingMatrix = Matrix.concatenate(translateMatrix, textRenderingMatrix);
nextX -= pageSize.getLowerLeftX();
nextY -= pageSize.getLowerLeftY();
}
// This is a hack for unicode letter with 2 chars e.g. RA see unicodeProblem.pdf
if (unicodeMapping.length() == 2) {
processTextPosition(new TextPosition(pageRotation,
pageSize.getWidth(),
pageSize.getHeight(),
textRenderingMatrix,
translatedTextRenderingMatrix,
nextX,
nextY,
Math.abs(dyDisplay),
@ -277,7 +293,7 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
processTextPosition(new TextPosition(pageRotation,
pageSize.getWidth(),
pageSize.getHeight(),
textRenderingMatrix,
translatedTextRenderingMatrix,
nextX,
nextY,
Math.abs(dyDisplay),
@ -293,7 +309,7 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
processTextPosition(new TextPosition(pageRotation,
pageSize.getWidth(),
pageSize.getHeight(),
textRenderingMatrix,
translatedTextRenderingMatrix,
nextX,
nextY,
Math.abs(dyDisplay),

View File

@ -0,0 +1,7 @@
package com.knecon.fforesight.service.layoutparser.processor.utils;
public class ParsingConstants {
public final static float NEW_LINE_TEXT_HEIGHT_PERCENTAGE = 0.6f;
}

View File

@ -51,13 +51,47 @@ public class TextPositionSequenceComparator implements Comparator<TextPositionSe
double yDifference = Math.abs(pos1YBottom - pos2YBottom);
// we will do a simple tolerance comparison
if (yDifference < .1 || pos2YBottom >= pos1YTop && pos2YBottom <= pos1YBottom || pos1YBottom >= pos2YTop && pos1YBottom <= pos2YBottom) {
return Double.compare(x1, x2);
} else if (pos1YBottom < pos2YBottom) {
return -1;
} else {
return 1;
// Adjust for text rotation
switch (pos1.getRotation()) {
case 0:
// 0 degrees (horizontal, top to bottom and left to right): Sort primarily by y-coordinates from top to bottom (pos1YBottom < pos2YBottom).
if (yDifference < .1 || (pos2YBottom >= pos1YTop && pos2YBottom <= pos1YBottom) || (pos1YBottom >= pos2YTop && pos1YBottom <= pos2YBottom)) {
return Double.compare(x1, x2);
} else if (pos1YBottom < pos2YBottom) {
return -1;
} else {
return 1;
}
case 90:
// 90 degrees (vertical, right to left): Sort by x-coordinates first (x1 > x2), then by y-coordinates from top to bottom (pos1YBottom < pos2YBottom).
if (x1 > x2) {
return -1;
} else if (x1 < x2) {
return 1;
} else {
return Double.compare(pos1YBottom, pos2YBottom);
}
case 180:
// 180 degrees (horizontal, bottom to top and right to left): Sort primarily by y-coordinates from bottom to top (pos1YBottom > pos2YBottom).
if (yDifference < .1 || (pos2YBottom >= pos1YTop && pos2YBottom <= pos1YBottom) || (pos1YBottom >= pos2YTop && pos1YBottom <= pos2YBottom)) {
return Double.compare(x2, x1);
} else if (pos1YBottom > pos2YBottom) {
return -1;
} else {
return 1;
}
case 270:
// 270 degrees (vertical, left to right): Sort by x-coordinates in reverse (x2 > x1), then by y-coordinates from bottom to top (pos2YBottom > pos1YBottom).
if (x2 > x1) {
return -1;
} else if (x2 < x1) {
return 1;
} else {
return Double.compare(pos2YBottom, pos1YBottom);
}
default:
throw new RuntimeException("Rotation not supported. Only 0/90/180/270 degree rotation is supported.");
}
}

View File

@ -32,6 +32,7 @@ dependencies {
implementation("com.iqser.red.commons:storage-commons:2.45.0")
implementation("com.knecon.fforesight:tenant-commons:0.21.0")
implementation("com.knecon.fforesight:tracing-commons:0.5.0")
implementation("com.knecon.fforesight:lifecycle-commons:0.6.0")
implementation("org.springframework.boot:spring-boot-starter-actuator:${springBootStarterVersion}")
implementation("org.springframework.boot:spring-boot-starter-amqp:${springBootStarterVersion}")
implementation("com.amazonaws:aws-java-sdk-s3:1.12.536")

View File

@ -6,19 +6,22 @@ import org.springframework.boot.autoconfigure.ImportAutoConfiguration;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.EnableAspectJAutoProxy;
import org.springframework.context.annotation.Import;
import com.amazonaws.services.s3.model.metrics.MetricsConfiguration;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.knecon.fforesight.lifecyclecommons.LifecycleAutoconfiguration;
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration;
import com.knecon.fforesight.tenantcommons.MultiTenancyAutoConfiguration;
import io.micrometer.observation.ObservationRegistry;
import io.micrometer.observation.aop.ObservedAspect;
@ImportAutoConfiguration({MultiTenancyAutoConfiguration.class})
@ImportAutoConfiguration({MultiTenancyAutoConfiguration.class, LifecycleAutoconfiguration.class})
@Import({MetricsConfiguration.class, StorageAutoConfiguration.class, LayoutParsingServiceProcessorConfiguration.class})
@SpringBootApplication(exclude = {SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class})
@EnableAspectJAutoProxy
public class Application {
public static void main(String[] args) {

View File

@ -15,6 +15,9 @@ project.version: 1.0-SNAPSHOT
server:
port: 8080
lifecycle:
base-package: com.knecon.fforesight.service.layoutparser
spring:
application:
name: layoutparser-service

View File

@ -1,6 +1,7 @@
package com.knecon.fforesight.service.viewerdoc.service;
import java.awt.geom.AffineTransform;
import java.awt.geom.Line2D;
import java.awt.geom.Rectangle2D;
import java.io.File;
import java.io.FileOutputStream;
@ -84,6 +85,7 @@ public class ViewerDocumentService {
pdPage.setContents(ContentStreamUtility.removeLayerFromContentStreams(allLayers, classifiers));
AffineTransform textDeRotationMatrix = getTextDeRotationTransform(pdPage);
AffineTransform pageTransformationMatrix = getPageTransformationMatrix(pdPage);
if (!ContentStreamClassifier.areAllContentStreamsEscaped(classifiers)) {
// We need to save the graphics state before, such that our appended content cannot be affected by previous content streams with side effects,
@ -106,7 +108,11 @@ public class ViewerDocumentService {
contentStream.saveGraphicsState();
drawVisualizationsToContentStream(pdDocument, visualization.getVisualizationsOnPages().get(pageNumber), contentStream, textDeRotationMatrix);
drawVisualizationsToContentStream(pdDocument,
visualization.getVisualizationsOnPages().get(pageNumber),
contentStream,
textDeRotationMatrix,
pageTransformationMatrix);
contentStream.restoreGraphicsState();
@ -133,6 +139,12 @@ public class ViewerDocumentService {
}
private AffineTransform getPageTransformationMatrix(PDPage pdPage) {
return new AffineTransform(1, 0, 0, 1, pdPage.getCropBox().getLowerLeftX(), pdPage.getCropBox().getLowerLeftY());
}
private static Map<ContentStreams.Identifier, PDOptionalContentGroup> addLayersToDocument(List<Visualizations> visualizations, PDDocument pdDocument) {
Map<ContentStreams.Identifier, PDOptionalContentGroup> optionalContentGroupMap = new HashMap<>();
@ -147,7 +159,8 @@ public class ViewerDocumentService {
private static void drawVisualizationsToContentStream(PDDocument pdDocument,
VisualizationsOnPage visualizationsOnPage,
PDPageContentStream contentStream,
AffineTransform textDeRotationMatrix) throws IOException {
AffineTransform textDeRotationMatrix,
AffineTransform pageTransformationMatrix) throws IOException {
if (visualizationsOnPage.isMakePathsInvisible()) {
contentStream.addRect(0, 0, 1, 1);
@ -155,17 +168,18 @@ public class ViewerDocumentService {
}
for (ColoredLine coloredLine : visualizationsOnPage.getColoredLines()) {
Line2D line = transformLine(coloredLine.line(), pageTransformationMatrix);
contentStream.setLineWidth(coloredLine.lineWidth());
contentStream.setStrokingColor(coloredLine.color());
contentStream.moveTo((float) coloredLine.line().getX1(), (float) coloredLine.line().getY1());
contentStream.lineTo((float) coloredLine.line().getX2(), (float) coloredLine.line().getY2());
contentStream.moveTo((float) line.getX1(), (float) line.getY1());
contentStream.lineTo((float) line.getX2(), (float) line.getY2());
contentStream.stroke();
}
for (ColoredRectangle coloredRectangle : visualizationsOnPage.getColoredRectangles()) {
contentStream.setLineWidth(coloredRectangle.lineWidth());
contentStream.setStrokingColor(coloredRectangle.color());
Rectangle2D r = coloredRectangle.rectangle2D();
Rectangle2D r = transformRect(coloredRectangle.rectangle2D(), pageTransformationMatrix);
contentStream.addRect((float) r.getX(), (float) r.getY(), (float) r.getWidth(), (float) r.getHeight());
contentStream.stroke();
}
@ -175,7 +189,7 @@ public class ViewerDocumentService {
PDExtendedGraphicsState graphicsState = new PDExtendedGraphicsState();
graphicsState.setNonStrokingAlphaConstant(filledRectangle.alpha());
contentStream.setGraphicsStateParameters(graphicsState);
Rectangle2D r = filledRectangle.rectangle2D();
Rectangle2D r = transformRect(filledRectangle.rectangle2D(), pageTransformationMatrix);
contentStream.addRect((float) r.getX(), (float) r.getY(), (float) r.getWidth(), (float) r.getHeight());
contentStream.fill();
}
@ -190,7 +204,7 @@ public class ViewerDocumentService {
} else {
contentStream.setRenderingMode(RenderingMode.FILL);
}
Matrix textMatrix = getTextMatrix(placedText, textDeRotationMatrix);
Matrix textMatrix = getTextMatrix(placedText, textDeRotationMatrix, pageTransformationMatrix);
contentStream.setTextMatrix(textMatrix);
contentStream.showText(placedText.text());
contentStream.endText();
@ -223,7 +237,7 @@ public class ViewerDocumentService {
}
private static Matrix getTextMatrix(PlacedText placedText, AffineTransform textDeRotationMatrix) {
private static Matrix getTextMatrix(PlacedText placedText, AffineTransform textDeRotationMatrix, AffineTransform pageTransformationMatrix) {
Matrix textMatrix;
if (placedText.textMatrix().isEmpty()) {
@ -236,7 +250,7 @@ public class ViewerDocumentService {
} else {
textMatrix = placedText.textMatrix().get();
}
return textMatrix;
return new Matrix(pageTransformationMatrix).multiply(textMatrix);
}
@ -313,4 +327,16 @@ public class ViewerDocumentService {
});
}
private static Line2D transformLine(Line2D line, AffineTransform pageTransformation) {
return new Line2D.Double(pageTransformation.transform(line.getP1(), null), pageTransformation.transform(line.getP2(), null));
}
private static Rectangle2D transformRect(Rectangle2D r, AffineTransform pageTransformation) {
return pageTransformation.createTransformedShape(r).getBounds2D();
}
}