Merge branch 'RED-10127' into 'main'
RED-10127: improve headline detection by fitting BBoxes tightly and therefore... See merge request fforesight/azure-ocr-service!17
This commit is contained in:
commit
6845afb1dd
@ -0,0 +1,11 @@
|
||||
package com.knecon.fforesight.service.ocr.v1.api.model;
|
||||
|
||||
public enum AzureOcrFeature {
|
||||
|
||||
ROTATION_CORRECTION,
|
||||
IDP,
|
||||
FONT_STYLE_DETECTION,
|
||||
ALL_PAGES,
|
||||
REMOVE_WATERMARKS
|
||||
|
||||
}
|
||||
@ -1,6 +1,8 @@
|
||||
package com.knecon.fforesight.service.ocr.v1.api.model;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -18,12 +20,13 @@ public class DocumentRequest {
|
||||
|
||||
String dossierId;
|
||||
String fileId;
|
||||
boolean removeWatermark;
|
||||
|
||||
String originDocumentId;
|
||||
String viewerDocId;
|
||||
String idpResultId;
|
||||
|
||||
boolean removeWatermark;
|
||||
Set<AzureOcrFeature> features;
|
||||
|
||||
|
||||
public DocumentRequest(String dossierId, String fileId) {
|
||||
@ -33,18 +36,23 @@ public class DocumentRequest {
|
||||
originDocumentId = null;
|
||||
viewerDocId = null;
|
||||
idpResultId = null;
|
||||
removeWatermark = false;
|
||||
features = Collections.emptySet();
|
||||
}
|
||||
|
||||
|
||||
// needed for backwards compatibility
|
||||
public DocumentRequest(String dossierId, String fileId, boolean removeWatermark) {
|
||||
|
||||
this.dossierId = dossierId;
|
||||
this.fileId = fileId;
|
||||
this.removeWatermark = removeWatermark;
|
||||
originDocumentId = null;
|
||||
viewerDocId = null;
|
||||
idpResultId = null;
|
||||
if (removeWatermark) {
|
||||
features = Set.of(AzureOcrFeature.REMOVE_WATERMARKS);
|
||||
} else {
|
||||
features = Collections.emptySet();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -9,19 +9,61 @@ import java.util.stream.Stream;
|
||||
|
||||
public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
|
||||
|
||||
/*
|
||||
B _____ C
|
||||
| |
|
||||
A|_____|D
|
||||
*/
|
||||
public enum Direction {
|
||||
RIGHT,
|
||||
/*
|
||||
B _____ C
|
||||
| |
|
||||
A|_____|D
|
||||
*/
|
||||
DOWN,
|
||||
/*
|
||||
* A _____ B
|
||||
* | |
|
||||
* D|_____|C
|
||||
*/
|
||||
LEFT,
|
||||
/*
|
||||
* D _____ A
|
||||
* | |
|
||||
* C|_____|B
|
||||
* */
|
||||
UP,
|
||||
/*
|
||||
* C _____ D
|
||||
* | |
|
||||
* B|_____|A
|
||||
*/
|
||||
NONE
|
||||
/*
|
||||
* ? _____ ?
|
||||
* | |
|
||||
* ?|_____|?
|
||||
*/
|
||||
}
|
||||
private static final double THRESHOLD_ANGLE = Math.toRadians(5); // QuadPoint is considered straight, when its angles are below this threshold
|
||||
|
||||
|
||||
public static QuadPoint fromRectangle2D(Rectangle2D rectangle2D) {
|
||||
|
||||
return new QuadPoint(new Point2D.Double(rectangle2D.getX(), rectangle2D.getY()),
|
||||
new Point2D.Double(rectangle2D.getX(), rectangle2D.getMaxY()),
|
||||
new Point2D.Double(rectangle2D.getMaxX(), rectangle2D.getMaxY()),
|
||||
new Point2D.Double(rectangle2D.getMaxX(), rectangle2D.getY()));
|
||||
return fromRectangle2D(rectangle2D, Direction.NONE);
|
||||
}
|
||||
|
||||
|
||||
public static QuadPoint fromRectangle2D(Rectangle2D rectangle2D, Direction direction) {
|
||||
|
||||
var lowerLeft = new Point2D.Double(rectangle2D.getX(), rectangle2D.getY());
|
||||
var upperLeft = new Point2D.Double(rectangle2D.getX(), rectangle2D.getMaxY());
|
||||
var upperRight = new Point2D.Double(rectangle2D.getMaxX(), rectangle2D.getMaxY());
|
||||
var lowerRight = new Point2D.Double(rectangle2D.getMaxX(), rectangle2D.getY());
|
||||
|
||||
return switch (direction) {
|
||||
case DOWN -> new QuadPoint(upperLeft, upperRight, lowerRight, lowerLeft);
|
||||
case LEFT -> new QuadPoint(upperRight, lowerRight, lowerLeft, upperLeft);
|
||||
case UP -> new QuadPoint(lowerRight, lowerLeft, upperLeft, upperRight);
|
||||
default -> new QuadPoint(lowerLeft, upperLeft, upperRight, lowerRight);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -56,6 +98,35 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
|
||||
}
|
||||
|
||||
|
||||
public boolean isHorizontal() {
|
||||
|
||||
double angle = calculateAngle(a, d);
|
||||
double angle2 = calculateAngle(b, c);
|
||||
return Math.abs(angle) <= THRESHOLD_ANGLE || Math.abs(angle2) <= THRESHOLD_ANGLE;
|
||||
}
|
||||
|
||||
|
||||
public boolean isVertical() {
|
||||
|
||||
double rightAngle = Math.PI / 2;
|
||||
double angle = calculateAngle(a, d);
|
||||
double angle2 = calculateAngle(b, c);
|
||||
return Math.abs(rightAngle - Math.abs(angle)) <= THRESHOLD_ANGLE || Math.abs(rightAngle - Math.abs(angle2)) <= THRESHOLD_ANGLE;
|
||||
}
|
||||
|
||||
|
||||
public Direction getDirection() {
|
||||
|
||||
if (isHorizontal()) {
|
||||
return a.getX() < d.getX() ? Direction.RIGHT : Direction.LEFT;
|
||||
}
|
||||
if (isVertical()) {
|
||||
return a.getY() < d.getY() ? Direction.UP : Direction.DOWN;
|
||||
}
|
||||
return Direction.NONE;
|
||||
}
|
||||
|
||||
|
||||
public Stream<Line2D> asLines() {
|
||||
|
||||
return Stream.of(new Line2D.Double(a(), b()), new Line2D.Double(b(), c()), new Line2D.Double(c(), d()), new Line2D.Double(d(), a()));
|
||||
@ -63,7 +134,7 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
|
||||
}
|
||||
|
||||
|
||||
public QuadPointData data() {
|
||||
public QuadPointData toData() {
|
||||
|
||||
return new QuadPointData(new float[]{(float) a.getX(), (float) a.getY(), (float) b.getX(), (float) b.getY(), (float) c.getX(), (float) c.getY(), (float) d.getX(), (float) d.getY()});
|
||||
}
|
||||
@ -134,13 +205,19 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
|
||||
}
|
||||
|
||||
|
||||
public double size() {
|
||||
public double getRectangularSize() {
|
||||
|
||||
return a().distance(b()) * a().distance(d());
|
||||
}
|
||||
|
||||
|
||||
public double angle() {
|
||||
public double getAngle() {
|
||||
|
||||
return calculateAngle(a, d);
|
||||
}
|
||||
|
||||
|
||||
private static double calculateAngle(Point2D a, Point2D d) {
|
||||
|
||||
double deltaY = d.getY() - a.getY();
|
||||
double deltaX = d.getX() - a.getX();
|
||||
|
||||
@ -17,10 +17,8 @@ public class OcrServiceSettings {
|
||||
int batchSize = 128;
|
||||
|
||||
boolean debug; // writes the ocr layer visibly to the viewer doc pdf
|
||||
boolean idpEnabled; // Enables table detection, paragraph classification, section detection, key-value detection.
|
||||
boolean drawTablesAsLines; // writes the tables to the PDF as invisible lines.
|
||||
boolean processAllPages; // if this parameter is set, ocr will be performed on any page, regardless if it has images or not
|
||||
boolean fontStyleDetection; // Enables bold detection using ghostscript and leptonica
|
||||
boolean snuggify = true; // Enables bold detection using ghostscript and leptonica
|
||||
String contentFormat; // Either markdown or text. But, for whatever reason, with markdown enabled, key-values are not written by azure....
|
||||
|
||||
}
|
||||
|
||||
@ -7,6 +7,7 @@ import com.pdftron.pdf.PDFNet;
|
||||
import com.sun.jna.NativeLibrary;
|
||||
|
||||
import jakarta.annotation.PostConstruct;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@ -14,6 +15,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class NativeLibrariesInitializer {
|
||||
|
||||
@Value("${pdftron.license:}")
|
||||
|
||||
@ -2,19 +2,32 @@ package com.knecon.fforesight.service.ocr.processor.model;
|
||||
|
||||
import static com.knecon.fforesight.service.ocr.processor.utils.ListSplittingUtils.formatIntervals;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import com.azure.core.util.BinaryData;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.Getter;
|
||||
import lombok.NonNull;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@RequiredArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public final class PageBatch implements Comparable<PageBatch> {
|
||||
|
||||
@Getter
|
||||
int index;
|
||||
@NonNull
|
||||
List<Integer> lookup = new ArrayList<>();
|
||||
List<Integer> lookup;
|
||||
@NonNull
|
||||
@Getter
|
||||
Path batchDoc;
|
||||
@NonNull
|
||||
@Getter
|
||||
Path imagePipelineDir;
|
||||
|
||||
|
||||
@Override
|
||||
@ -34,12 +47,6 @@ public final class PageBatch implements Comparable<PageBatch> {
|
||||
}
|
||||
|
||||
|
||||
public void add(Integer pageNumber) {
|
||||
|
||||
lookup.add(pageNumber);
|
||||
}
|
||||
|
||||
|
||||
public void forEach(Consumer<? super Integer> consumer) {
|
||||
|
||||
lookup.forEach(consumer);
|
||||
@ -84,4 +91,10 @@ public final class PageBatch implements Comparable<PageBatch> {
|
||||
return Integer.compare(lookup.get(0), o.lookup.get(0));
|
||||
}
|
||||
|
||||
|
||||
public BinaryData render() {
|
||||
|
||||
return BinaryData.fromFile(batchDoc);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -3,7 +3,6 @@ package com.knecon.fforesight.service.ocr.processor.model;
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.awt.geom.Point2D;
|
||||
|
||||
import com.azure.ai.documentintelligence.models.DocumentWord;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontMetrics;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontMetricsProvider;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontStyle;
|
||||
@ -20,7 +19,8 @@ public class TextPositionInImage {
|
||||
|
||||
final QuadPoint position;
|
||||
final String text;
|
||||
final AffineTransform imageCTM;
|
||||
final AffineTransform resultToPageTransform;
|
||||
final boolean snugBBox;
|
||||
|
||||
@Setter
|
||||
boolean overlapsIgnoreZone;
|
||||
@ -30,33 +30,34 @@ public class TextPositionInImage {
|
||||
FontStyle fontStyle;
|
||||
|
||||
|
||||
public TextPositionInImage(DocumentWord word, AffineTransform imageCTM, FontMetricsProvider fontMetricsProvider, FontStyle fontStyle) {
|
||||
public TextPositionInImage(QuadPoint position, String text, AffineTransform resultToPageTransform, FontMetricsProvider fontMetricsProvider, FontStyle fontStyle, boolean snugBBox) {
|
||||
|
||||
this.position = QuadPoint.fromPolygons(word.getPolygon());
|
||||
this.text = word.getContent();
|
||||
this.imageCTM = imageCTM;
|
||||
this.position = position;
|
||||
this.text = text;
|
||||
this.resultToPageTransform = resultToPageTransform;
|
||||
this.fontMetricsProvider = fontMetricsProvider;
|
||||
this.fontStyle = fontStyle;
|
||||
this.snugBBox = snugBBox;
|
||||
}
|
||||
|
||||
|
||||
public QuadPoint getTransformedTextBBox() {
|
||||
|
||||
return position.getTransformed(imageCTM);
|
||||
return position.getTransformed(resultToPageTransform);
|
||||
}
|
||||
|
||||
|
||||
public AffineTransform getTextMatrix() {
|
||||
|
||||
FontMetrics metrics = fontMetricsProvider.calculateMetrics(text, getTransformedWidth(), getTransformedHeight());
|
||||
FontMetrics metrics = getMetrics();
|
||||
|
||||
// Matrix multiplication is from right to left:
|
||||
// convert to image coords -> subtract descent -> scale height -> reverse imageCTM scaling -> translate to coordinates in image -> convert to pdf coords
|
||||
// width must not be set, since it is scaled with the fontsize attribute
|
||||
double rotation = position.angle();
|
||||
double rotation = position.getAngle();
|
||||
Point2D anchor = new Point2D.Double(position.b().getX(), position.b().getY());
|
||||
AffineTransform ctm = new AffineTransform();
|
||||
ctm.concatenate(imageCTM);
|
||||
ctm.concatenate(resultToPageTransform);
|
||||
ctm.translate(anchor.getX(), anchor.getY());
|
||||
ctm.scale(getWidth() / getTransformedWidth(),
|
||||
getHeight() / getTransformedHeight()); // scale with transformation coefficient, such that fontsize may be set with transformed width.
|
||||
@ -69,6 +70,15 @@ public class TextPositionInImage {
|
||||
}
|
||||
|
||||
|
||||
private FontMetrics getMetrics() {
|
||||
|
||||
if (snugBBox) {
|
||||
return fontMetricsProvider.calculateMetricsForTightBBox(text, getTransformedWidth(), getTransformedHeight());
|
||||
}
|
||||
return fontMetricsProvider.calculateMetricsForAzureBBox(text, getTransformedWidth(), getTransformedHeight());
|
||||
}
|
||||
|
||||
|
||||
public double getFontSize() {
|
||||
// The fontsize as estimated by the word width
|
||||
return fontMetricsProvider.calculateFontSize(text, getTransformedWidth());
|
||||
@ -95,7 +105,7 @@ public class TextPositionInImage {
|
||||
|
||||
public double getFontSizeByHeight() {
|
||||
// The fontsize as estimated by the word height, only used for font style detection
|
||||
var metrics = fontMetricsProvider.calculateMetrics(text, getTransformedWidth(), getTransformedHeight());
|
||||
var metrics = getMetrics();
|
||||
return fontMetricsProvider.calculateFontSize(text, getTransformedWidth()) * metrics.getHeightScaling();
|
||||
}
|
||||
|
||||
@ -108,25 +118,25 @@ public class TextPositionInImage {
|
||||
|
||||
public Point2D transformedA() {
|
||||
|
||||
return imageCTM.transform(position.a(), null);
|
||||
return resultToPageTransform.transform(position.a(), null);
|
||||
}
|
||||
|
||||
|
||||
public Point2D transformedB() {
|
||||
|
||||
return imageCTM.transform(position.b(), null);
|
||||
return resultToPageTransform.transform(position.b(), null);
|
||||
}
|
||||
|
||||
|
||||
public Point2D transformedC() {
|
||||
|
||||
return imageCTM.transform(position.c(), null);
|
||||
return resultToPageTransform.transform(position.c(), null);
|
||||
}
|
||||
|
||||
|
||||
public Point2D transformedD() {
|
||||
|
||||
return imageCTM.transform(position.d(), null);
|
||||
return resultToPageTransform.transform(position.d(), null);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,28 +1,27 @@
|
||||
package com.knecon.fforesight.service.ocr.processor.service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.slf4j.MDC;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.azure.ai.documentintelligence.models.AnalyzeResult;
|
||||
import com.azure.core.util.BinaryData;
|
||||
import com.azure.core.util.polling.LongRunningOperationStatus;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
|
||||
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
|
||||
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.layers.LayerFactory;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.layers.OcrResult;
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.Optimizer;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
||||
import com.pdftron.pdf.PDFDoc;
|
||||
import com.pdftron.sdf.SDFDoc;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import reactor.core.publisher.Mono;
|
||||
@ -35,16 +34,13 @@ public class AsyncOcrService {
|
||||
|
||||
AzureOcrResource azureOcrResource;
|
||||
OcrServiceSettings settings;
|
||||
ImageProcessingPipeline imageProcessingPipeline;
|
||||
ObjectMapper mapper;
|
||||
|
||||
|
||||
public OcrResult awaitOcr(PDFDoc pdfDoc,
|
||||
OcrExecutionSupervisor supervisor,
|
||||
Set<Integer> pagesWithImages,
|
||||
ImageProcessingSupervisor imageSupervisor) throws InterruptedException, PDFNetException {
|
||||
public OcrResult awaitOcr(PDFDoc pdfDoc, OcrExecutionSupervisor supervisor, Set<AzureOcrFeature> features, List<PageBatch> batches) throws InterruptedException {
|
||||
|
||||
LayerFactory layerFactory = new LayerFactory(settings, supervisor, imageSupervisor, PageInformation.fromPDFDoc(pdfDoc));
|
||||
|
||||
List<PageBatch> batches = splitIntoBatches(pdfDoc, supervisor, pagesWithImages);
|
||||
LayerFactory layerFactory = new LayerFactory(settings, features, supervisor, PageInformation.fromPDFDoc(pdfDoc), imageProcessingPipeline);
|
||||
|
||||
for (PageBatch batch : batches) {
|
||||
|
||||
@ -57,12 +53,10 @@ public class AsyncOcrService {
|
||||
supervisor.requireNoErrors();
|
||||
|
||||
batchContext.batchStats().start();
|
||||
|
||||
BinaryData data = renderBatch(pdfDoc, batch);
|
||||
|
||||
BinaryData data = batch.render();
|
||||
batchContext.batchStats().batchRenderFinished();
|
||||
|
||||
beginAnalysis(data, batchContext);
|
||||
beginAnalysis(data, batchContext, features);
|
||||
}
|
||||
|
||||
supervisor.awaitAllPagesProcessed();
|
||||
@ -71,45 +65,17 @@ public class AsyncOcrService {
|
||||
}
|
||||
|
||||
|
||||
private static BinaryData renderBatch(PDFDoc pdfDoc, PageBatch batch) throws PDFNetException {
|
||||
|
||||
BinaryData docData;
|
||||
try (var smallerDoc = extractBatchDocument(pdfDoc, batch)) {
|
||||
Optimizer.optimize(smallerDoc);
|
||||
docData = BinaryData.fromBytes(smallerDoc.save(SDFDoc.SaveMode.LINEARIZED, null));
|
||||
}
|
||||
return docData;
|
||||
}
|
||||
|
||||
|
||||
private List<PageBatch> splitIntoBatches(PDFDoc pdfDoc, OcrExecutionSupervisor supervisor, Set<Integer> pagesWithImages) throws PDFNetException {
|
||||
|
||||
List<PageBatch> batches = new ArrayList<>();
|
||||
PageBatch currentBatch = new PageBatch();
|
||||
batches.add(currentBatch);
|
||||
for (int pageNumber = 1; pageNumber <= pdfDoc.getPageCount(); pageNumber++) {
|
||||
if (!settings.isProcessAllPages() && !pagesWithImages.contains(pageNumber)) {
|
||||
supervisor.logPageSkipped(pageNumber);
|
||||
continue;
|
||||
}
|
||||
currentBatch.add(pageNumber);
|
||||
if (currentBatch.size() == settings.getBatchSize()) {
|
||||
currentBatch = new PageBatch();
|
||||
batches.add(currentBatch);
|
||||
}
|
||||
}
|
||||
return batches;
|
||||
}
|
||||
|
||||
|
||||
private void beginAnalysis(BinaryData data, BatchContext batchContext) throws InterruptedException {
|
||||
private void beginAnalysis(BinaryData data, BatchContext batchContext, Set<AzureOcrFeature> features) throws InterruptedException {
|
||||
|
||||
batchContext.supervisor.enterConcurrency(batchContext.batch);
|
||||
|
||||
batchContext.supervisor.logUploadStart(batchContext.batch, data.getLength());
|
||||
|
||||
azureOcrResource.callAzureAsync(data)
|
||||
var mdcContext = MDC.getCopyOfContextMap();
|
||||
|
||||
azureOcrResource.callAzureAsync(data, features)
|
||||
.flatMap(response -> {
|
||||
MDC.setContextMap(mdcContext);
|
||||
if (response.getStatus().equals(LongRunningOperationStatus.IN_PROGRESS)) {
|
||||
batchContext.supervisor.logInProgress(batchContext.batch);
|
||||
}
|
||||
@ -128,6 +94,7 @@ public class AsyncOcrService {
|
||||
|
||||
private static void handleCompleted(BatchContext batchContext) {
|
||||
|
||||
log.info("Completed : {}", batchContext.batch);
|
||||
batchContext.supervisor.leaveConcurrency(batchContext.batch);
|
||||
}
|
||||
|
||||
@ -141,32 +108,17 @@ public class AsyncOcrService {
|
||||
private void handleSuccessful(AnalyzeResult finalResult, BatchContext batchContext) {
|
||||
|
||||
try {
|
||||
batchContext.layerFactory.addAnalyzeResult(batchContext.batch, finalResult);
|
||||
batchContext.supervisor.logPageSuccess(batchContext.batch);
|
||||
batchContext.supervisor.logPageSuccess(batchContext.batch());
|
||||
if (settings.isDebug()) {
|
||||
mapper.writeValue(batchContext.batch().getImagePipelineDir().resolve("azure_result_%d.json".formatted(batchContext.batch().getIndex())).toFile(), finalResult);
|
||||
}
|
||||
batchContext.layerFactory.processAnalyzeResult(batchContext.batch(), finalResult);
|
||||
} catch (Exception e) {
|
||||
handleError(e, batchContext);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static PDFDoc extractBatchDocument(PDFDoc pdfDoc, PageBatch pageBatch) throws PDFNetException {
|
||||
|
||||
if (pageBatch.size() < 0) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
PDFDoc singlePagePdfDoc = new PDFDoc();
|
||||
pageBatch.forEach(pageNumber -> addPageToNewDoc(pageNumber, pdfDoc, singlePagePdfDoc));
|
||||
return singlePagePdfDoc;
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private static void addPageToNewDoc(Integer pageNumber, PDFDoc pdfDoc, PDFDoc singlePagePdfDoc) {
|
||||
|
||||
singlePagePdfDoc.pagePushBack(pdfDoc.getPage(pageNumber));
|
||||
}
|
||||
|
||||
|
||||
private record BatchContext(LayerFactory layerFactory, OcrExecutionSupervisor supervisor, PageBatch batch) {
|
||||
|
||||
BatchStats batchStats() {
|
||||
|
||||
@ -2,9 +2,11 @@ package com.knecon.fforesight.service.ocr.processor.service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.yaml.snakeyaml.events.Event;
|
||||
|
||||
import com.azure.ai.documentintelligence.DocumentIntelligenceAsyncClient;
|
||||
import com.azure.ai.documentintelligence.DocumentIntelligenceClientBuilder;
|
||||
@ -19,6 +21,7 @@ import com.azure.core.util.BinaryData;
|
||||
import com.azure.core.util.polling.PollerFlux;
|
||||
import com.google.common.base.Objects;
|
||||
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.SneakyThrows;
|
||||
@ -42,11 +45,18 @@ public class AzureOcrResource {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public PollerFlux<AnalyzeResultOperation, AnalyzeResult> callAzureAsync(BinaryData data) {
|
||||
public PollerFlux<AnalyzeResultOperation, AnalyzeResult> callAzureAsync(BinaryData data, Set<AzureOcrFeature> features) {
|
||||
|
||||
AnalyzeDocumentRequest analyzeRequest = new AnalyzeDocumentRequest().setBase64Source(data.toBytes());
|
||||
|
||||
return asyncClient.beginAnalyzeDocument(getModelId(), null, null, StringIndexType.UTF16CODE_UNIT, buildFeatures(), null, buildContentFormat(), analyzeRequest);
|
||||
return asyncClient.beginAnalyzeDocument(getModelId(features),
|
||||
null,
|
||||
null,
|
||||
StringIndexType.UTF16CODE_UNIT,
|
||||
buildFeatures(features),
|
||||
null,
|
||||
buildContentFormat(),
|
||||
analyzeRequest);
|
||||
|
||||
}
|
||||
|
||||
@ -60,25 +70,25 @@ public class AzureOcrResource {
|
||||
}
|
||||
|
||||
|
||||
private String getModelId() {
|
||||
private String getModelId(Set<AzureOcrFeature> features) {
|
||||
|
||||
if (settings.isIdpEnabled()) {
|
||||
if (features.contains(AzureOcrFeature.IDP)) {
|
||||
return "prebuilt-layout";
|
||||
}
|
||||
return "prebuilt-read";
|
||||
}
|
||||
|
||||
|
||||
private List<DocumentAnalysisFeature> buildFeatures() {
|
||||
private List<DocumentAnalysisFeature> buildFeatures(Set<AzureOcrFeature> features) {
|
||||
|
||||
var features = new ArrayList<DocumentAnalysisFeature>();
|
||||
var azureFeatures = new ArrayList<DocumentAnalysisFeature>();
|
||||
|
||||
if (settings.isIdpEnabled()) {
|
||||
features.add(DocumentAnalysisFeature.KEY_VALUE_PAIRS);
|
||||
if (features.contains(AzureOcrFeature.IDP)) {
|
||||
azureFeatures.add(DocumentAnalysisFeature.KEY_VALUE_PAIRS);
|
||||
}
|
||||
features.add(DocumentAnalysisFeature.BARCODES);
|
||||
azureFeatures.add(DocumentAnalysisFeature.BARCODES);
|
||||
|
||||
return features;
|
||||
return azureFeatures;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,107 @@
|
||||
package com.knecon.fforesight.service.ocr.processor.service;
|
||||
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.Optimizer;
|
||||
import com.pdftron.pdf.PDFDoc;
|
||||
import com.pdftron.sdf.SDFDoc;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class BatchFactory {
|
||||
|
||||
OcrServiceSettings settings;
|
||||
|
||||
|
||||
public static String formatBatchFilename(int number) {
|
||||
|
||||
return "batch_%d.pdf".formatted(number);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public List<PageBatch> splitIntoBatches(PDFDoc pdfDoc, OcrExecutionSupervisor supervisor, Set<AzureOcrFeature> features, Path fileDir) {
|
||||
|
||||
Set<Integer> pagesWithImages = ImageDetectionService.findPagesToProcess(pdfDoc, features);
|
||||
supervisor.logImageExtractionFinished(pdfDoc.getPageCount(), pagesWithImages.size());
|
||||
|
||||
return buildBatches(pdfDoc, supervisor, features, fileDir, pagesWithImages);
|
||||
}
|
||||
|
||||
|
||||
public List<PageBatch> buildBatches(PDFDoc pdfDoc,
|
||||
OcrExecutionSupervisor supervisor,
|
||||
Set<AzureOcrFeature> features,
|
||||
Path fileDir,
|
||||
Set<Integer> pagesWithImages) throws PDFNetException {
|
||||
|
||||
List<PageBatch> batches = new ArrayList<>();
|
||||
List<Integer> numbersForCurrentBatch = new ArrayList<>();
|
||||
for (int pageNumber = 1; pageNumber <= pdfDoc.getPageCount(); pageNumber++) {
|
||||
if (!features.contains(AzureOcrFeature.ALL_PAGES) && !pagesWithImages.contains(pageNumber)) {
|
||||
supervisor.logPageSkipped(pageNumber);
|
||||
continue;
|
||||
}
|
||||
numbersForCurrentBatch.add(pageNumber);
|
||||
if (numbersForCurrentBatch.size() == settings.getBatchSize()) {
|
||||
batches.add(create(batches.size(), pdfDoc, numbersForCurrentBatch, fileDir));
|
||||
numbersForCurrentBatch = new ArrayList<>();
|
||||
}
|
||||
}
|
||||
if (!numbersForCurrentBatch.isEmpty()) {
|
||||
batches.add(create(batches.size(), pdfDoc, numbersForCurrentBatch, fileDir));
|
||||
}
|
||||
return batches;
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public static PageBatch create(int number, PDFDoc pdfDoc, List<Integer> pageNumbers, Path fileDir) {
|
||||
|
||||
if (pageNumbers.isEmpty()) {
|
||||
throw new IllegalArgumentException("pageNumbers must not be empty");
|
||||
}
|
||||
|
||||
Path batchDocPath = fileDir.resolve(formatBatchFilename(number));
|
||||
try (var batchDoc = extractBatchDocument(pdfDoc, pageNumbers)) {
|
||||
Optimizer.optimize(batchDoc);
|
||||
batchDoc.save(batchDocPath.toFile().toString(), SDFDoc.SaveMode.LINEARIZED, null);
|
||||
}
|
||||
return new PageBatch(number, pageNumbers, batchDocPath, fileDir);
|
||||
}
|
||||
|
||||
|
||||
private static PDFDoc extractBatchDocument(PDFDoc pdfDoc, List<Integer> pageBatch) throws PDFNetException {
|
||||
|
||||
if (pageBatch.isEmpty()) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
PDFDoc batchDoc = new PDFDoc();
|
||||
pageBatch.forEach(pageNumber -> addPageToNewDoc(pageNumber, pdfDoc, batchDoc));
|
||||
return batchDoc;
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private static void addPageToNewDoc(Integer pageNumber, PDFDoc pdfDoc, PDFDoc batchDoc) {
|
||||
|
||||
batchDoc.pagePushBack(pdfDoc.getPage(pageNumber));
|
||||
}
|
||||
|
||||
}
|
||||
@ -7,28 +7,26 @@ import java.util.Set;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.Element;
|
||||
import com.pdftron.pdf.ElementReader;
|
||||
import com.pdftron.pdf.PDFDoc;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@Service
|
||||
@UtilityClass
|
||||
public class ImageDetectionService {
|
||||
|
||||
// any image with smaller height and width than this gets thrown out, see everyPointInDashedLineIsImage.pdf
|
||||
private static final int PIXEL_THRESHOLD = 0;
|
||||
private final OcrServiceSettings ocrServiceSettings;
|
||||
|
||||
|
||||
public ImageDetectionService(OcrServiceSettings ocrServiceSettings) {this.ocrServiceSettings = ocrServiceSettings;}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public Set<Integer> findPagesToProcess(PDFDoc pdfDoc) {
|
||||
public Set<Integer> findPagesToProcess(PDFDoc pdfDoc, Set<AzureOcrFeature> features) {
|
||||
|
||||
if (ocrServiceSettings.isProcessAllPages()) {
|
||||
if (features.contains(AzureOcrFeature.ALL_PAGES)) {
|
||||
Set<Integer> pages = new HashSet<>();
|
||||
for (int i = 1; i <= pdfDoc.getPageCount(); i++) {
|
||||
pages.add(i);
|
||||
|
||||
@ -8,6 +8,7 @@ import java.io.FileOutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
@ -16,10 +17,11 @@ import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService;
|
||||
import com.iqser.red.pdftronlogic.commons.OCGWatermarkRemovalService;
|
||||
import com.iqser.red.pdftronlogic.commons.WatermarkRemovalService;
|
||||
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.Statistics;
|
||||
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
|
||||
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.layers.OcrResult;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
||||
import com.knecon.fforesight.service.viewerdoc.service.PDFTronViewerDocumentService;
|
||||
import com.pdftron.pdf.PDFDoc;
|
||||
|
||||
@ -36,14 +38,15 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class OCRService {
|
||||
|
||||
public static final String IMAGE_PIPELINE_DIR = "image_pipeline";
|
||||
public static final String AZURE_OUTPUT_DIR = "azure_output";
|
||||
IOcrMessageSender ocrMessageSender;
|
||||
WatermarkRemovalService watermarkRemovalService;
|
||||
InvisibleElementRemovalService invisibleElementRemovalService;
|
||||
PDFTronViewerDocumentService viewerDocumentService;
|
||||
ImageDetectionService imageDetectionService;
|
||||
BatchFactory batchFactory;
|
||||
AsyncOcrService asyncOcrService;
|
||||
OcrServiceSettings settings;
|
||||
ImageProcessingPipeline imageProcessingPipeline;
|
||||
|
||||
|
||||
/**
|
||||
@ -59,21 +62,21 @@ public class OCRService {
|
||||
* @param analyzeResultFile result file with additional information
|
||||
*/
|
||||
@Observed(name = "OCRService", contextualName = "run-ocr-on-document")
|
||||
public void runOcrOnDocument(String dossierId, String fileId, boolean removeWatermark, Path tmpDir, File documentFile, File viewerDocumentFile, File analyzeResultFile) {
|
||||
public void runOcrOnDocument(String dossierId, String fileId, Set<AzureOcrFeature> features, Path tmpDir, File documentFile, File viewerDocumentFile, File analyzeResultFile) {
|
||||
|
||||
if (removeWatermark) {
|
||||
if (features.contains(AzureOcrFeature.REMOVE_WATERMARKS)) {
|
||||
removeWatermark(documentFile);
|
||||
}
|
||||
|
||||
removeInvisibleElements(documentFile);
|
||||
|
||||
log.info("Starting OCR for file {}", fileId);
|
||||
log.info("Starting OCR");
|
||||
long ocrStart = System.currentTimeMillis();
|
||||
|
||||
Statistics stats = runOcr(tmpDir, documentFile, viewerDocumentFile, fileId, dossierId, analyzeResultFile).getStatistics();
|
||||
Statistics stats = runOcr(tmpDir, documentFile, viewerDocumentFile, fileId, dossierId, analyzeResultFile, features).getStatistics();
|
||||
|
||||
long ocrEnd = System.currentTimeMillis();
|
||||
log.info("ocr successful for file with dossierId {} and fileId {}, took {}", dossierId, fileId, humanizeDuration(ocrEnd - ocrStart));
|
||||
log.info("OCR successful, took {}", humanizeDuration(ocrEnd - ocrStart));
|
||||
|
||||
if (settings.isDebug()) {
|
||||
logRuntimeBreakdown(ocrEnd, ocrStart, stats);
|
||||
@ -117,10 +120,16 @@ public class OCRService {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public OcrExecutionSupervisor runOcr(Path tmpDir, File documentFile, File viewerDocumentFile, String fileId, String dossierId, File analyzeResultFile) {
|
||||
public OcrExecutionSupervisor runOcr(Path tmpDir,
|
||||
File documentFile,
|
||||
File viewerDocumentFile,
|
||||
String fileId,
|
||||
String dossierId,
|
||||
File analyzeResultFile,
|
||||
Set<AzureOcrFeature> features) {
|
||||
|
||||
Path tmpImageDir = tmpDir.resolve("images");
|
||||
Path azureOutputDir = tmpDir.resolve("azure_output");
|
||||
Path tmpImageDir = tmpDir.resolve(IMAGE_PIPELINE_DIR);
|
||||
Path azureOutputDir = tmpDir.resolve(AZURE_OUTPUT_DIR);
|
||||
|
||||
Files.createDirectories(azureOutputDir);
|
||||
Files.createDirectories(tmpImageDir);
|
||||
@ -132,19 +141,18 @@ public class OCRService {
|
||||
OcrExecutionSupervisor supervisor = new OcrExecutionSupervisor(pdfDoc.getPageCount(), ocrMessageSender, fileId, settings);
|
||||
supervisor.getStatistics().setStart();
|
||||
|
||||
Set<Integer> pagesWithImages = imageDetectionService.findPagesToProcess(pdfDoc);
|
||||
ImageProcessingSupervisor imageSupervisor = null;
|
||||
if (settings.isFontStyleDetection()) {
|
||||
imageSupervisor = imageProcessingPipeline.run(pagesWithImages, tmpImageDir, documentFile);
|
||||
}
|
||||
List<PageBatch> batches = batchFactory.splitIntoBatches(pdfDoc, supervisor, features, tmpImageDir);
|
||||
|
||||
supervisor.logImageExtractionFinished(pdfDoc.getPageCount(), pagesWithImages.size());
|
||||
|
||||
OcrResult ocrResult = asyncOcrService.awaitOcr(pdfDoc, supervisor, pagesWithImages, imageSupervisor);
|
||||
OcrResult ocrResult = asyncOcrService.awaitOcr(pdfDoc, supervisor, features, batches);
|
||||
|
||||
viewerDocumentService.addLayerGroups(documentFile, documentFile, ocrResult.regularLayers());
|
||||
viewerDocumentService.addLayerGroups(documentFile, viewerDocumentFile, ocrResult.debugLayers());
|
||||
|
||||
if (features.contains(AzureOcrFeature.ROTATION_CORRECTION)) {
|
||||
RotationCorrectionUtility.rotatePages(documentFile.toPath(), documentFile.toPath(), ocrResult.anglesPerPage());
|
||||
RotationCorrectionUtility.rotatePages(viewerDocumentFile.toPath(), viewerDocumentFile.toPath(), ocrResult.anglesPerPage());
|
||||
}
|
||||
|
||||
supervisor.getStatistics().drawingPdfFinished();
|
||||
|
||||
supervisor.sendFinished();
|
||||
|
||||
@ -92,10 +92,10 @@ public class OcrExecutionSupervisor {
|
||||
}
|
||||
|
||||
|
||||
public void finishMappingResult(PageBatch pageRange) {
|
||||
public void finishMappingResult(PageBatch batch) {
|
||||
|
||||
pageRange.forEach(pageIndex -> countDownPagesToProcess.countDown());
|
||||
statistics.getBatchStats(pageRange).finishWritingText();
|
||||
batch.forEach(pageIndex -> countDownPagesToProcess.countDown());
|
||||
statistics.getBatchStats(batch).finishWritingText();
|
||||
ocrMessageSender.sendUpdate(fileId, this.processedPages(), getTotalPageCount());
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,179 @@
|
||||
package com.knecon.fforesight.service.ocr.processor.service.imageprocessing;
|
||||
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.nio.IntBuffer;
|
||||
import java.util.Optional;
|
||||
|
||||
import com.azure.ai.documentintelligence.models.DocumentPage;
|
||||
import com.azure.ai.documentintelligence.models.DocumentWord;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResultFactory;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint;
|
||||
import com.sun.jna.Pointer;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import net.sourceforge.lept4j.Leptonica1;
|
||||
import net.sourceforge.lept4j.Numa;
|
||||
import net.sourceforge.lept4j.Pix;
|
||||
import net.sourceforge.lept4j.util.LeptUtils;
|
||||
|
||||
/**
|
||||
* This class attempts to shrink the BBox of a word to match the exact height of the word. This is only attempted for horizontal or vertical words. Any askew text is left as is.
|
||||
*/
|
||||
@Slf4j
|
||||
public class BBoxSnuggificationService {
|
||||
|
||||
public static final int PIXEL_COUNT_THRESHOLD = 2; // minimum active pixel count per row for shrinking to stop
|
||||
private static final double AVERAGE_ANGLE_THRESHOLD = 0.2; // Skips snuggification, if the average remaining word rotation of a word, written from left-to-right is bigger than this
|
||||
public static final int INDIVIDUAL_ANGLE_THRESHOLD = 5; // skips snuggification for word, if the remaining rotation is larger than this angle
|
||||
|
||||
private enum Operation {
|
||||
HORIZONTAL,
|
||||
VERTICAL,
|
||||
NONE
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public static Optional<QuadPoint> snuggify(Pix pageImage, DocumentWord origin, AffineTransform resultToImageTransform) {
|
||||
|
||||
if (pageImage == null) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
QuadPoint originTransformed = QuadPoint.fromPolygons(origin.getPolygon()).getTransformed(resultToImageTransform);
|
||||
double remainingAngle = Math.abs(RotationCorrectionUtility.getRemainingAngle(originTransformed.getAngle()));
|
||||
QuadPoint.Direction direction = originTransformed.getDirection();
|
||||
|
||||
Operation operation = determineOperation(origin, direction, remainingAngle, originTransformed);
|
||||
|
||||
if (operation == Operation.NONE) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
Pix wordImage = WritableOcrResultFactory.extractWordImage(originTransformed, pageImage);
|
||||
|
||||
if (wordImage == null) {
|
||||
log.debug("Unable to extract word image! wordImage: {}, pageImage {}", originTransformed.getBounds2D(), new Rectangle2D.Float(0, 0, pageImage.w, pageImage.h));
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
Optional<Rectangle2D> snugBox = switch (operation) {
|
||||
case HORIZONTAL -> snuggifyY(wordImage, originTransformed.getBounds2D());
|
||||
case VERTICAL -> snuggifyX(wordImage, originTransformed.getBounds2D());
|
||||
default -> Optional.empty();
|
||||
};
|
||||
LeptUtils.disposePix(wordImage);
|
||||
|
||||
AffineTransform imageToResultTransform = resultToImageTransform.createInverse();
|
||||
return snugBox.map(snugBBox -> QuadPoint.fromRectangle2D(snugBBox, direction))
|
||||
.map(bbox -> bbox.getTransformed(imageToResultTransform));
|
||||
|
||||
}
|
||||
|
||||
|
||||
private static Operation determineOperation(DocumentWord origin, QuadPoint.Direction direction, double remainingAngle, QuadPoint originTransformed) {
|
||||
|
||||
Operation operation = Operation.NONE;
|
||||
if (((direction.equals(QuadPoint.Direction.RIGHT) || direction.equals(QuadPoint.Direction.LEFT)) && remainingAngle < INDIVIDUAL_ANGLE_THRESHOLD) //
|
||||
|| (origin.getContent().length() < 4 || Math.abs(originTransformed.getAngle()) < AVERAGE_ANGLE_THRESHOLD * 3)) {
|
||||
operation = Operation.HORIZONTAL;
|
||||
} else if ((direction.equals(QuadPoint.Direction.UP) || direction.equals(QuadPoint.Direction.DOWN)) && remainingAngle < INDIVIDUAL_ANGLE_THRESHOLD) {
|
||||
operation = Operation.VERTICAL;
|
||||
}
|
||||
return operation;
|
||||
}
|
||||
|
||||
|
||||
private static Optional<Rectangle2D> snuggifyX(Pix wordImage, Rectangle2D origin) {
|
||||
|
||||
Numa colCounts = Leptonica1.pixCountPixelsByColumn(wordImage);
|
||||
int start = 0;
|
||||
int end = wordImage.w - PIXEL_COUNT_THRESHOLD;
|
||||
for (int i = start; i < Math.min(wordImage.w, 25); i++) {
|
||||
if (pixCountPerColumn(i, colCounts) > PIXEL_COUNT_THRESHOLD) {
|
||||
start = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (int i = end; i > Math.max(0, wordImage.w - 25); i--) {
|
||||
if (pixCountPerColumn(i, colCounts) > PIXEL_COUNT_THRESHOLD) {
|
||||
end = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (start == 0 && end == wordImage.w) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
return Optional.of(new Rectangle2D.Double(origin.getX() + start, origin.getY(), origin.getWidth() - start - (wordImage.w - end), origin.getHeight()));
|
||||
}
|
||||
|
||||
|
||||
private static Optional<Rectangle2D> snuggifyY(Pix wordImage, Rectangle2D origin) {
|
||||
|
||||
int start = 0;
|
||||
int end = wordImage.h - 1;
|
||||
for (int i = start; i < Math.min(wordImage.h, 25); i++) {
|
||||
if (pixCountPerRow(i, wordImage) > PIXEL_COUNT_THRESHOLD) {
|
||||
start = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (int i = end; i > Math.max(0, wordImage.h - 25); i--) {
|
||||
if (pixCountPerRow(i, wordImage) > PIXEL_COUNT_THRESHOLD) {
|
||||
end = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (start == 0 && end == wordImage.h) {
|
||||
return Optional.empty();
|
||||
}
|
||||
return Optional.of(new Rectangle2D.Double(origin.getX(), origin.getY() + start, origin.getWidth(), origin.getHeight() - start - (wordImage.h - end)));
|
||||
}
|
||||
|
||||
|
||||
private static int pixCountPerRow(int row, Pix pix) {
|
||||
|
||||
IntBuffer result = IntBuffer.allocate(1);
|
||||
int success = Leptonica1.pixCountPixelsInRow(pix, row, result, null);
|
||||
if (success == 0) {
|
||||
return result.get();
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static int pixCountPerColumn(int column, Numa colCounts) {
|
||||
|
||||
if (column > colCounts.n) {
|
||||
throw new IndexOutOfBoundsException("column " + column + " is out of bounds for column count " + colCounts.n);
|
||||
}
|
||||
Pointer pointer = colCounts.array.getPointer();
|
||||
|
||||
// Read the float value at position i. Each float takes 4 bytes.
|
||||
return (int) pointer.getFloat((long) column * Float.BYTES);
|
||||
}
|
||||
|
||||
|
||||
public static boolean canBeSnuggified(DocumentPage resultPage, AffineTransform imageTransform) {
|
||||
|
||||
double averageAngle = resultPage.getWords()
|
||||
.stream()
|
||||
.filter(word -> word.getContent().length() >= 4)
|
||||
.map(DocumentWord::getPolygon)
|
||||
.map(QuadPoint::fromPolygons)
|
||||
.map(qp -> qp.getTransformed(imageTransform))
|
||||
.filter(qp -> qp.getDirection().equals(QuadPoint.Direction.RIGHT))
|
||||
.mapToDouble(QuadPoint::getAngle)
|
||||
.map(Math::toDegrees)
|
||||
.map(RotationCorrectionUtility::getRemainingAngle).average()
|
||||
.orElse(Double.MAX_VALUE);
|
||||
|
||||
return Math.abs(averageAngle) < AVERAGE_ANGLE_THRESHOLD;
|
||||
}
|
||||
|
||||
}
|
||||
@ -84,6 +84,7 @@ public class FontStyleDetector implements Closeable {
|
||||
wordImage.textPosition().setFontMetricsProvider(Type0FontMetricsProvider.BOLD_INSTANCE);
|
||||
wordImage.textPosition().setFontStyle(FontStyle.BOLD);
|
||||
} else {
|
||||
wordImage.textPosition().setFontMetricsProvider(Type0FontMetricsProvider.REGULAR_INSTANCE);
|
||||
wordImage.textPosition().setFontStyle(FontStyle.REGULAR);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,16 +1,20 @@
|
||||
package com.knecon.fforesight.service.ocr.processor.service.imageprocessing;
|
||||
|
||||
import static com.knecon.fforesight.service.ocr.processor.utils.ListSplittingUtils.formatIntervals;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.slf4j.MDC;
|
||||
|
||||
import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
@ -27,7 +31,7 @@ public class GhostScriptOutputHandler extends Thread {
|
||||
|
||||
// If the stdError or stdOut buffer of a thread is not being emptied it might lock the process in case of errors, so we need to empty both streams to prevent a deadlock.
|
||||
// Since both need to read simultaneously we need to implement the readers as separate threads.
|
||||
|
||||
final int batchIdx;
|
||||
final InputStream is;
|
||||
final String processName;
|
||||
final Type type;
|
||||
@ -36,24 +40,32 @@ public class GhostScriptOutputHandler extends Thread {
|
||||
final Consumer<ImageFile> outputHandler;
|
||||
final Consumer<String> errorHandler;
|
||||
|
||||
final Map<String, String> parentMdcContext;
|
||||
|
||||
int currentPageNumber;
|
||||
|
||||
|
||||
public static GhostScriptOutputHandler stdError(InputStream is, Consumer<String> errorHandler) {
|
||||
public static GhostScriptOutputHandler stdError(int batchIdx, InputStream is, Consumer<String> errorHandler) {
|
||||
|
||||
return new GhostScriptOutputHandler(is, "GS", Type.ERROR, null, null, errorHandler);
|
||||
return new GhostScriptOutputHandler(batchIdx, is, "GS", Type.ERROR, null, null, errorHandler, MDC.getCopyOfContextMap());
|
||||
}
|
||||
|
||||
|
||||
public static GhostScriptOutputHandler stdOut(InputStream is, Map<Integer, ImageFile> pagesToProcess, Consumer<ImageFile> imageFileOutput, Consumer<String> errorHandler) {
|
||||
public static GhostScriptOutputHandler stdOut(int batchIdx,
|
||||
InputStream is,
|
||||
Map<Integer, ImageFile> pagesToProcess,
|
||||
Consumer<ImageFile> imageFileOutput,
|
||||
Consumer<String> errorHandler) {
|
||||
|
||||
return new GhostScriptOutputHandler(is, "GS", Type.STD_OUT, pagesToProcess, imageFileOutput, errorHandler);
|
||||
return new GhostScriptOutputHandler(batchIdx, is, "GS", Type.STD_OUT, pagesToProcess, imageFileOutput, errorHandler, MDC.getCopyOfContextMap());
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void run() {
|
||||
|
||||
MDC.setContextMap(parentMdcContext);
|
||||
|
||||
try (InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr)) {
|
||||
|
||||
String line;
|
||||
@ -77,7 +89,9 @@ public class GhostScriptOutputHandler extends Thread {
|
||||
queueFinishedPage(currentPageNumber);
|
||||
|
||||
if (!pagesToProcess.isEmpty()) {
|
||||
errorHandler.accept(String.format("Ghostscript finished for batch, but pages %s remain unprocessed.", formatPagesToProcess()));
|
||||
errorHandler.accept(String.format("Ghostscript finished for batch %d, but pages %s remain unprocessed.", batchIdx, formatPagesToProcess()));
|
||||
} else {
|
||||
log.info("{}: Batch rendered successfully!", batchIdx);
|
||||
}
|
||||
}
|
||||
|
||||
@ -86,10 +100,16 @@ public class GhostScriptOutputHandler extends Thread {
|
||||
|
||||
private String formatPagesToProcess() {
|
||||
|
||||
var pages = new PageBatch();
|
||||
pagesToProcess.keySet()
|
||||
.forEach(pages::add);
|
||||
return pages.toString();
|
||||
List<String> intervals = formatIntervals(pagesToProcess.keySet()
|
||||
.stream()
|
||||
.sorted()
|
||||
.toList());
|
||||
if (intervals.size() > 4) {
|
||||
intervals = intervals.subList(0, 4);
|
||||
intervals.add("...");
|
||||
}
|
||||
|
||||
return String.join(", ", intervals);
|
||||
}
|
||||
|
||||
|
||||
@ -106,7 +126,6 @@ public class GhostScriptOutputHandler extends Thread {
|
||||
currentPageNumber = pageNumber;
|
||||
return;
|
||||
}
|
||||
|
||||
queueFinishedPage(currentPageNumber);
|
||||
currentPageNumber = pageNumber;
|
||||
}
|
||||
@ -117,10 +136,10 @@ public class GhostScriptOutputHandler extends Thread {
|
||||
|
||||
var imageFile = this.pagesToProcess.remove(pageNumber);
|
||||
if (imageFile == null) {
|
||||
errorHandler.accept(String.format("Page number %d does not exist in this thread. It only has pagenumbers %s", pageNumber, pagesToProcess.keySet()));
|
||||
errorHandler.accept(String.format("%d: Page number %d does not exist in this thread. It only has pagenumbers %s", batchIdx, pageNumber, pagesToProcess.keySet()));
|
||||
} else {
|
||||
if (!new File(imageFile.absoluteFilePath()).exists()) {
|
||||
errorHandler.accept(String.format("Rendered page with number %d does not exist!", pageNumber));
|
||||
errorHandler.accept(String.format("%d: Rendered page with number %d does not exist!", batchIdx, pageNumber));
|
||||
}
|
||||
}
|
||||
outputHandler.accept(imageFile);
|
||||
|
||||
@ -2,18 +2,15 @@ package com.knecon.fforesight.service.ocr.processor.service.imageprocessing;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
|
||||
import com.knecon.fforesight.service.ocr.processor.utils.ListSplittingUtils;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
@ -25,132 +22,60 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
@SuppressWarnings("PMD") // can't figure out how to safely close the stdOut and stdError streams in line 142/144
|
||||
@SuppressWarnings("PMD") // can't figure out how to safely close the stdOut and stdError streams in line 72/74
|
||||
public class GhostScriptService {
|
||||
|
||||
public static final int BATCH_SIZE = 256;
|
||||
static String FORMAT = ".tiff";
|
||||
static String DEVICE = "tiffgray";
|
||||
static int DPI = 300;
|
||||
static int PROCESS_COUNT = 1;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void renderPagesBatched(List<Integer> pagesToProcess,
|
||||
String documentAbsolutePath,
|
||||
Path tmpImageDir,
|
||||
ImageProcessingSupervisor supervisor,
|
||||
Consumer<ImageFile> successHandler,
|
||||
Consumer<String> errorHandler) {
|
||||
public void startBatchRender(PageBatch batch, ImageProcessingSupervisor supervisor, Path renderedImageDir, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) {
|
||||
|
||||
List<List<ProcessInfo>> processInfoBatches = buildSubListForEachProcess(pagesToProcess,
|
||||
PROCESS_COUNT,
|
||||
BATCH_SIZE
|
||||
* PROCESS_COUNT); // GS has a limit on how many pageIndices per call are possible, so we limit it to 256 pages per process
|
||||
for (int batchIdx = 0; batchIdx < processInfoBatches.size(); batchIdx++) {
|
||||
supervisor.requireNoErrors();
|
||||
|
||||
supervisor.requireNoErrors();
|
||||
|
||||
List<ProcessInfo> processInfos = processInfoBatches.get(batchIdx);
|
||||
|
||||
log.info("Batch {}: Running {} gs processes with ({}) pages each",
|
||||
batchIdx,
|
||||
processInfos.size(),
|
||||
processInfos.stream()
|
||||
.map(info -> info.pageNumbers().size())
|
||||
.map(String::valueOf)
|
||||
.collect(Collectors.joining(", ")));
|
||||
|
||||
int finalBatchIdx = batchIdx;
|
||||
List<Process> processes = processInfos.stream()
|
||||
.parallel()
|
||||
.map(info -> buildCmdArgs(info.processIdx(), finalBatchIdx, info.pageNumbers(), tmpImageDir, documentAbsolutePath))
|
||||
.peek(s -> log.debug(String.join(" ", s.cmdArgs())))
|
||||
.map(processInfo -> executeProcess(processInfo, successHandler, errorHandler))
|
||||
.toList();
|
||||
|
||||
List<Integer> processExitCodes = new LinkedList<>();
|
||||
for (Process process : processes) {
|
||||
processExitCodes.add(process.waitFor());
|
||||
}
|
||||
log.info("Batch {}: Ghostscript processes finished with exit codes {}", batchIdx, processExitCodes);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private List<List<ProcessInfo>> buildSubListForEachProcess(List<Integer> stitchedPageNumbers, int processCount, int batchSize) {
|
||||
|
||||
// GhostScript command line can only handle so many page numbers at once, so we split it into batches
|
||||
int batchCount = (int) Math.ceil((double) stitchedPageNumbers.size() / batchSize);
|
||||
|
||||
log.info("Splitting {} page renderings across {} process(es) in {} batch(es) with size {}", stitchedPageNumbers.size(), processCount, batchCount, batchSize);
|
||||
|
||||
List<List<ProcessInfo>> processInfoBatches = new ArrayList<>(batchCount);
|
||||
List<List<List<Integer>>> batchedBalancedSublist = ListSplittingUtils.buildBatchedBalancedSublist(stitchedPageNumbers.stream()
|
||||
.sorted()
|
||||
.toList(), processCount, batchCount);
|
||||
|
||||
for (var batch : batchedBalancedSublist) {
|
||||
List<ProcessInfo> processInfos = new ArrayList<>(processCount);
|
||||
for (int threadIdx = 0; threadIdx < batch.size(); threadIdx++) {
|
||||
List<Integer> balancedPageNumbersSubList = batch.get(threadIdx);
|
||||
processInfos.add(new ProcessInfo(threadIdx, balancedPageNumbersSubList));
|
||||
}
|
||||
processInfoBatches.add(processInfos);
|
||||
}
|
||||
return processInfoBatches;
|
||||
log.info("Batch {}: starting GhostScript rendering with {} pages", batch, batch.size());
|
||||
executeProcess(batch.getIndex(), buildCmdArgs(batch, renderedImageDir, batch.getBatchDoc()), successHandler, errorHandler);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private ProcessCmdsAndRenderedImageFiles buildCmdArgs(Integer processIdx,
|
||||
Integer batchIdx,
|
||||
List<Integer> stitchedImagePageIndices,
|
||||
Path outputDir,
|
||||
String documentAbsolutePath) {
|
||||
private ProcessCmdsAndRenderedImageFiles buildCmdArgs(PageBatch batch, Path outputDir, Path document) {
|
||||
|
||||
String imagePathFormat = outputDir.resolve("output_" + processIdx + "_" + batchIdx + ".%04d" + FORMAT).toFile().toString();
|
||||
String imagePathFormat = outputDir.resolve("output_" + batch.getIndex() + ".%04d" + FORMAT).toFile().toString();
|
||||
|
||||
Map<Integer, ImageFile> fullPageImages = new HashMap<>();
|
||||
for (int i = 0; i < stitchedImagePageIndices.size(); i++) {
|
||||
Integer pageNumber = stitchedImagePageIndices.get(i);
|
||||
fullPageImages.put(pageNumber, new ImageFile(pageNumber, String.format(imagePathFormat, i + 1)));
|
||||
List<Integer> allPageNumbers = batch.getAllPageNumbers();
|
||||
|
||||
for (int i = 0; i < allPageNumbers.size(); i++) {
|
||||
Integer pageNumber = allPageNumbers.get(i);
|
||||
fullPageImages.put(i + 1, new ImageFile(pageNumber, String.format(imagePathFormat, i + 1)));
|
||||
}
|
||||
|
||||
String[] cmdArgs = buildCmdArgs(stitchedImagePageIndices, documentAbsolutePath, imagePathFormat);
|
||||
String[] cmdArgs = buildCmdArgs(document, imagePathFormat);
|
||||
|
||||
return new ProcessCmdsAndRenderedImageFiles(cmdArgs, fullPageImages);
|
||||
}
|
||||
|
||||
|
||||
private String[] buildCmdArgs(List<Integer> pageNumbers, String documentAbsolutePath, String imagePathFormat) {
|
||||
private String[] buildCmdArgs(Path document, String imagePathFormat) {
|
||||
|
||||
StringBuilder sPageList = new StringBuilder();
|
||||
int i = 1;
|
||||
for (Integer integer : pageNumbers) {
|
||||
sPageList.append(integer);
|
||||
if (i < pageNumbers.size()) {
|
||||
sPageList.append(",");
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
return new String[]{"gs", "-dNOPAUSE", "-sDEVICE=" + DEVICE, "-r" + DPI, "-sPageList=" + sPageList, "-sOutputFile=" + imagePathFormat, documentAbsolutePath, "-c", "quit"};
|
||||
return new String[]{"gs", "-dNOPAUSE", "-sDEVICE=" + DEVICE, "-r" + DPI, "-sOutputFile=" + imagePathFormat, document.toFile().toString(), "-c", "quit"};
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private Process executeProcess(ProcessCmdsAndRenderedImageFiles processInfo, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) {
|
||||
private void executeProcess(int batchIdx, ProcessCmdsAndRenderedImageFiles processInfo, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) {
|
||||
|
||||
Process p = Runtime.getRuntime().exec(processInfo.cmdArgs());
|
||||
InputStream stdOut = p.getInputStream();
|
||||
GhostScriptOutputHandler stdOutLogger = GhostScriptOutputHandler.stdOut(stdOut, processInfo.renderedPageImageFiles(), successHandler, errorHandler);
|
||||
GhostScriptOutputHandler stdOutLogger = GhostScriptOutputHandler.stdOut(batchIdx, stdOut, processInfo.renderedPageImageFiles(), successHandler, errorHandler);
|
||||
InputStream stdError = p.getErrorStream();
|
||||
GhostScriptOutputHandler stdErrorLogger = GhostScriptOutputHandler.stdError(stdError, errorHandler);
|
||||
GhostScriptOutputHandler stdErrorLogger = GhostScriptOutputHandler.stdError(batchIdx, stdError, errorHandler);
|
||||
|
||||
stdOutLogger.start();
|
||||
stdErrorLogger.start();
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
@ -158,8 +83,4 @@ public class GhostScriptService {
|
||||
|
||||
}
|
||||
|
||||
private record ProcessInfo(Integer processIdx, List<Integer> pageNumbers) {
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,15 +1,14 @@
|
||||
package com.knecon.fforesight.service.ocr.processor.service.imageprocessing;
|
||||
|
||||
import java.io.File;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
@ -21,29 +20,30 @@ import lombok.experimental.FieldDefaults;
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class ImageProcessingPipeline {
|
||||
|
||||
public static final String PROCESSED_DIR = "processed";
|
||||
public static final String RENDERED_DIR = "rendered";
|
||||
|
||||
GhostScriptService ghostScriptService;
|
||||
ImageProcessingService imageProcessingService;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public ImageProcessingSupervisor run(Set<Integer> pageNumberSet, Path imageDir, File document) {
|
||||
public ImageProcessingSupervisor addToPipeline(PageBatch batch) {
|
||||
|
||||
Path processedImageDir = imageDir.resolve("processed");
|
||||
Path renderedImageDir = imageDir.resolve("rendered");
|
||||
Path processedImageDir = batch.getImagePipelineDir().resolve(PROCESSED_DIR);
|
||||
Path renderedImageDir = batch.getImagePipelineDir().resolve(RENDERED_DIR);
|
||||
|
||||
Files.createDirectories(renderedImageDir);
|
||||
Files.createDirectories(processedImageDir);
|
||||
|
||||
List<Integer> pageNumbers = pageNumberSet.stream()
|
||||
.sorted()
|
||||
.toList();
|
||||
List<Integer> pageNumbers = batch.getAllPageNumbers();
|
||||
|
||||
ImageProcessingSupervisor supervisor = new ImageProcessingSupervisor(pageNumbers);
|
||||
|
||||
Consumer<ImageFile> renderingSuccessConsumer = imageFile -> imageProcessingService.addToProcessingQueue(imageFile, processedImageDir, supervisor);
|
||||
Consumer<String> renderingErrorConsumer = supervisor::markError;
|
||||
|
||||
ghostScriptService.renderPagesBatched(pageNumbers, document.toString(), renderedImageDir, supervisor, renderingSuccessConsumer, renderingErrorConsumer);
|
||||
ghostScriptService.startBatchRender(batch, supervisor, renderedImageDir, renderingSuccessConsumer, renderingErrorConsumer);
|
||||
|
||||
return supervisor;
|
||||
}
|
||||
|
||||
@ -38,6 +38,7 @@ public class ImageProcessingService {
|
||||
try {
|
||||
process(processParams.unprocessedImage(), processParams.outputDir, processParams.supervisor());
|
||||
} catch (Exception e) {
|
||||
processParams.supervisor.markPageFinished(processParams.unprocessedImage());
|
||||
log.error(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
@ -54,31 +55,31 @@ public class ImageProcessingService {
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void process(ImageFile unprocessedImage, Path outputDir, ImageProcessingSupervisor supervisor) {
|
||||
|
||||
supervisor.requireNoErrors();
|
||||
String absoluteFilePath = outputDir.resolve(Path.of(unprocessedImage.absoluteFilePath()).getFileName()).toFile().toString();
|
||||
ImageFile imageFile = new ImageFile(unprocessedImage.pageNumber(), absoluteFilePath);
|
||||
try {
|
||||
synchronized (ImageProcessingSupervisor.class) {
|
||||
// Leptonica is not thread safe, but is being called in WritableOcrResultFactory as well
|
||||
Pix processedPix;
|
||||
Pix pix = unprocessedImage.readPix();
|
||||
|
||||
synchronized (ImageProcessingSupervisor.class) {
|
||||
// Leptonica is not thread safe, but is being called in WritableOcrResultFactory as well
|
||||
Pix processedPix;
|
||||
Pix pix = unprocessedImage.readPix();
|
||||
processedPix = processPix(pix);
|
||||
Leptonica1.pixWrite(absoluteFilePath, processedPix, ILeptonica.IFF_TIFF_PACKBITS);
|
||||
|
||||
String absoluteFilePath = outputDir.resolve(Path.of(unprocessedImage.absoluteFilePath()).getFileName()).toFile().toString();
|
||||
LeptUtils.disposePix(pix);
|
||||
LeptUtils.disposePix(processedPix);
|
||||
|
||||
processedPix = processPix(pix);
|
||||
Leptonica1.pixWrite(absoluteFilePath, processedPix, ILeptonica.IFF_TIFF_PACKBITS);
|
||||
|
||||
LeptUtils.disposePix(pix);
|
||||
LeptUtils.disposePix(processedPix);
|
||||
|
||||
ImageFile imageFile = new ImageFile(unprocessedImage.pageNumber(), absoluteFilePath);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
supervisor.markError(e.getMessage());
|
||||
} finally {
|
||||
supervisor.markPageFinished(imageFile);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private Pix processPix(Pix pix) {
|
||||
|
||||
Pix binarized;
|
||||
|
||||
@ -63,7 +63,7 @@ public class ImageProcessingSupervisor {
|
||||
|
||||
private boolean hasErrors() {
|
||||
|
||||
return errors.isEmpty();
|
||||
return !errors.isEmpty();
|
||||
}
|
||||
|
||||
|
||||
@ -86,7 +86,7 @@ public class ImageProcessingSupervisor {
|
||||
if (this.errors.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
throw new IllegalStateException("Error(s) occurred during image processing: " + String.join("\n", errors));
|
||||
throw new IllegalStateException("Error(s) occurred during image processing: " + String.join("\n", errors.subList(0, Math.min(errors.size(), 3))));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,23 @@
|
||||
package com.knecon.fforesight.service.ocr.processor.visualizations;
|
||||
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.azure.ai.documentintelligence.models.DocumentSpan;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.SpanLookup;
|
||||
|
||||
public class FontStyler {
|
||||
|
||||
|
||||
public record Lookups(SpanLookup<DocumentSpan> bold, SpanLookup<DocumentSpan> italic, SpanLookup<DocumentSpan> handwritten) {
|
||||
|
||||
public static Lookups empty() {
|
||||
|
||||
return new Lookups(new SpanLookup<>(Stream.empty(), Function.identity()),
|
||||
new SpanLookup<>(Stream.empty(), Function.identity()),
|
||||
new SpanLookup<>(Stream.empty(), Function.identity()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -20,6 +20,7 @@ import lombok.experimental.FieldDefaults;
|
||||
public final class WritableOcrResult {
|
||||
|
||||
int pageNumber;
|
||||
double angle;
|
||||
@Builder.Default
|
||||
List<TextPositionInImage> textPositionInImage = Collections.emptyList();
|
||||
@Builder.Default
|
||||
|
||||
@ -11,6 +11,8 @@ import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
@ -25,54 +27,67 @@ import com.azure.ai.documentintelligence.models.DocumentTableCell;
|
||||
import com.azure.ai.documentintelligence.models.DocumentWord;
|
||||
import com.azure.ai.documentintelligence.models.FontWeight;
|
||||
import com.google.common.base.Functions;
|
||||
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.SpanLookup;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.TextPositionInImage;
|
||||
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
|
||||
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.FontStyleDetector;
|
||||
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
|
||||
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
|
||||
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.BBoxSnuggificationService;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontMetricsProvider;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontStyle;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.Type0FontMetricsProvider;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.Getter;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import net.sourceforge.lept4j.Box;
|
||||
import net.sourceforge.lept4j.Leptonica1;
|
||||
import net.sourceforge.lept4j.Pix;
|
||||
import net.sourceforge.lept4j.util.LeptUtils;
|
||||
|
||||
@Slf4j
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class WritableOcrResultFactory {
|
||||
|
||||
FontMetricsProvider regularFont = Type0FontMetricsProvider.REGULAR_INSTANCE;
|
||||
FontMetricsProvider boldFont = Type0FontMetricsProvider.BOLD_INSTANCE;
|
||||
FontMetricsProvider italicFont = Type0FontMetricsProvider.ITALIC_INSTANCE;
|
||||
FontMetricsProvider boldItalicFont = Type0FontMetricsProvider.BOLD_ITALIC_INSTANCE;
|
||||
|
||||
@Getter
|
||||
Map<Integer, AffineTransform> pageCtms;
|
||||
Map<Integer, AffineTransform> resultToPageTransforms;
|
||||
Map<Integer, PageInformation> pageInformation;
|
||||
ImageProcessingPipeline imageProcessingPipeline;
|
||||
OcrServiceSettings settings;
|
||||
ImageProcessingSupervisor imageSupervisor;
|
||||
Set<AzureOcrFeature> features;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public WritableOcrResultFactory(Map<Integer, PageInformation> pageInformation, OcrServiceSettings settings, ImageProcessingSupervisor imageSupervisor) {
|
||||
public WritableOcrResultFactory(Map<Integer, PageInformation> pageInformation,
|
||||
ImageProcessingPipeline imageProcessingPipeline,
|
||||
OcrServiceSettings settings,
|
||||
Set<AzureOcrFeature> features) {
|
||||
|
||||
this.imageProcessingPipeline = imageProcessingPipeline;
|
||||
this.pageInformation = pageInformation;
|
||||
pageCtms = Collections.synchronizedMap(new HashMap<>());
|
||||
resultToPageTransforms = Collections.synchronizedMap(new HashMap<>());
|
||||
this.settings = settings;
|
||||
this.imageSupervisor = imageSupervisor;
|
||||
this.features = features;
|
||||
}
|
||||
|
||||
|
||||
public List<WritableOcrResult> buildOcrResultToWrite(AnalyzeResult analyzeResult, PageBatch pageOffset) throws InterruptedException {
|
||||
public List<WritableOcrResult> buildOcrResultToWrite(AnalyzeResult analyzeResult, PageBatch batch) throws InterruptedException {
|
||||
|
||||
Map<Integer, Double> anglesPerPage = analyzeResult.getPages()
|
||||
.stream()
|
||||
.collect(Collectors.toMap(DocumentPage::getPageNumber, documentPage -> -documentPage.getAngle()));
|
||||
RotationCorrectionUtility.rotatePages(batch.getBatchDoc(), batch.getBatchDoc(), anglesPerPage);
|
||||
|
||||
ImageProcessingSupervisor imageSupervisor = imageProcessingPipeline.addToPipeline(batch);
|
||||
|
||||
List<WritableOcrResult> writableOcrResultList = new ArrayList<>();
|
||||
|
||||
@ -80,16 +95,16 @@ public class WritableOcrResultFactory {
|
||||
|
||||
for (DocumentPage resultPage : analyzeResult.getPages()) {
|
||||
|
||||
PageInformation pageInformation = getPageInformation(getPageNumber(pageOffset, resultPage));
|
||||
AffineTransform pageCtm = getPageCTM(pageInformation, resultPage.getWidth());
|
||||
pageCtms.put(getPageNumber(pageOffset, resultPage), pageCtm);
|
||||
PageInformation pageInformation = getPageInformation(getPageNumber(batch, resultPage));
|
||||
AffineTransform resultToPageTransform = buildResultToPageTransform(pageInformation, resultPage.getWidth());
|
||||
resultToPageTransforms.put(getPageNumber(batch, resultPage), resultToPageTransform);
|
||||
|
||||
List<TextPositionInImage> words = buildTextPositionsInImage(pageOffset, resultPage, pageCtm, lookups, pageInformation);
|
||||
List<TextPositionInImage> words = buildTextPositionsInImage(batch, resultPage, resultToPageTransform, lookups, pageInformation, imageSupervisor);
|
||||
|
||||
var builder = WritableOcrResult.builder().pageNumber(pageInformation.number()).textPositionInImage(words);
|
||||
var builder = WritableOcrResult.builder().pageNumber(pageInformation.number()).textPositionInImage(words).angle(-resultPage.getAngle());
|
||||
|
||||
if (settings.isDrawTablesAsLines()) {
|
||||
builder.tableLines(getTableLines(analyzeResult, pageInformation, pageCtm));
|
||||
builder.tableLines(getTableLines(analyzeResult, pageInformation, resultToPageTransform));
|
||||
}
|
||||
|
||||
writableOcrResultList.add(builder.build());
|
||||
@ -101,46 +116,74 @@ public class WritableOcrResultFactory {
|
||||
|
||||
private List<TextPositionInImage> buildTextPositionsInImage(PageBatch pageOffset,
|
||||
DocumentPage resultPage,
|
||||
AffineTransform pageCtm,
|
||||
AffineTransform resultToPageTransform,
|
||||
Lookups lookups,
|
||||
PageInformation pageInformation) throws InterruptedException {
|
||||
PageInformation pageInformation,
|
||||
ImageProcessingSupervisor imageSupervisor) throws InterruptedException {
|
||||
|
||||
if (!settings.isFontStyleDetection()) {
|
||||
return buildText(resultPage, pageCtm, lookups, pageInformation);
|
||||
if (!settings.isSnuggify() && !features.contains(AzureOcrFeature.FONT_STYLE_DETECTION)) {
|
||||
return buildText(resultPage, resultToPageTransform, lookups, pageInformation);
|
||||
}
|
||||
|
||||
ImageFile imageFile = imageSupervisor.awaitProcessedPage(getPageNumber(pageOffset, resultPage));
|
||||
|
||||
if (imageFile == null) {
|
||||
return buildText(resultPage, pageCtm, lookups, pageInformation);
|
||||
return buildText(resultPage, resultToPageTransform, lookups, pageInformation);
|
||||
}
|
||||
|
||||
synchronized (ImageProcessingSupervisor.class) {
|
||||
return buildTextWithBoldDetection(resultPage, pageCtm, pageInformation, imageFile);
|
||||
// Leptonica is not thread safe, but is being called in ImageProcessingService as well
|
||||
|
||||
if (features.contains(AzureOcrFeature.FONT_STYLE_DETECTION)) {
|
||||
return buildTextWithBoldDetection(resultPage, resultToPageTransform, pageInformation, imageFile);
|
||||
}
|
||||
|
||||
return buildTextWithSnugBBoxes(resultPage, imageFile, resultToPageTransform, lookups, pageInformation);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static List<TextPositionInImage> buildTextWithBoldDetection(DocumentPage resultPage, AffineTransform pageCtm, PageInformation pageInformation, ImageFile imageFile) {
|
||||
// Leptonica is not thread safe, but is being called in ImageProcessingService as well
|
||||
@SneakyThrows
|
||||
private List<TextPositionInImage> buildTextWithBoldDetection(DocumentPage resultPage,
|
||||
AffineTransform resultToPageTransform,
|
||||
PageInformation pageInformation,
|
||||
ImageFile imageFile) {
|
||||
|
||||
Pix pageImage = imageFile.readPix();
|
||||
List<TextPositionInImage> words = new ArrayList<>();
|
||||
|
||||
try (FontStyleDetector fontStyleDetector = new FontStyleDetector()) {
|
||||
|
||||
AffineTransform imageTransform = new AffineTransform();
|
||||
double scalingFactor = pageImage.w / resultPage.getWidth();
|
||||
imageTransform.scale(scalingFactor, scalingFactor);
|
||||
AffineTransform resultToImageTransform = buildResultToImageTransform(resultPage, pageImage);
|
||||
|
||||
for (DocumentWord word : resultPage.getWords()) {
|
||||
|
||||
TextPositionInImage textPosition = new TextPositionInImage(word, pageCtm, Type0FontMetricsProvider.REGULAR_INSTANCE, FontStyle.REGULAR);
|
||||
TextPositionInImage textPosition;
|
||||
if (canBeSnuggified(resultPage, resultToImageTransform)) {
|
||||
textPosition = buildTextPositionInImageWithSnugBBox(word,
|
||||
resultToPageTransform,
|
||||
new FontInformation(FontStyle.REGULAR, Type0FontMetricsProvider.REGULAR_INSTANCE),
|
||||
pageImage,
|
||||
resultToImageTransform);
|
||||
} else {
|
||||
textPosition = new TextPositionInImage(QuadPoint.fromPolygons(word.getPolygon()),
|
||||
word.getContent(),
|
||||
resultToPageTransform,
|
||||
new FontInformation(FontStyle.REGULAR, Type0FontMetricsProvider.REGULAR_INSTANCE).font(),
|
||||
new FontInformation(FontStyle.REGULAR, Type0FontMetricsProvider.REGULAR_INSTANCE).fontStyle(),
|
||||
false);
|
||||
}
|
||||
|
||||
if (intersectsIgnoreZone(pageInformation.wordBBoxes(), textPosition)) {
|
||||
textPosition.setOverlapsIgnoreZone(true);
|
||||
}
|
||||
|
||||
Pix wordImage = extractWordImage(word, imageTransform, pageImage);
|
||||
QuadPoint originTransformed = QuadPoint.fromPolygons(word.getPolygon()).getTransformed(resultToImageTransform);
|
||||
Pix wordImage = extractWordImage(originTransformed, pageImage);
|
||||
|
||||
if (wordImage == null) {
|
||||
log.debug("Unable to extract word image! wordImage: {}, pageImage {}", originTransformed.getBounds2D(), new Rectangle2D.Float(0, 0, pageImage.w, pageImage.h));
|
||||
continue;
|
||||
}
|
||||
|
||||
IntBuffer pixelCount = IntBuffer.allocate(1);
|
||||
Leptonica1.pixCountPixels(wordImage, pixelCount, null);
|
||||
@ -162,9 +205,31 @@ public class WritableOcrResultFactory {
|
||||
}
|
||||
|
||||
|
||||
private static Pix extractWordImage(DocumentWord word, AffineTransform imageTransform, Pix pageImage) {
|
||||
@SneakyThrows
|
||||
public static AffineTransform buildResultToImageTransform(DocumentPage resultPage, Pix pageImage) {
|
||||
|
||||
int quadrant = RotationCorrectionUtility.getQuadrantRotation(-resultPage.getAngle());
|
||||
AffineTransform rotationCorrection = RotationCorrectionUtility.buildTransform(-resultPage.getAngle(), pageImage.w, pageImage.h);
|
||||
AffineTransform imageTransform = new AffineTransform();
|
||||
double scalingFactor = switch (quadrant) {
|
||||
case 1, 3 -> pageImage.h / resultPage.getWidth();
|
||||
default -> pageImage.w / resultPage.getWidth();
|
||||
};
|
||||
imageTransform.concatenate(rotationCorrection);
|
||||
imageTransform.scale(scalingFactor, scalingFactor);
|
||||
return imageTransform;
|
||||
}
|
||||
|
||||
|
||||
public static Pix extractWordImage(QuadPoint wordPosition, Pix pageImage) {
|
||||
|
||||
Rectangle2D wordBBox = wordPosition.getBounds2D();
|
||||
Rectangle2D pageBBox = new Rectangle2D.Double(0, 0, pageImage.w, pageImage.h);
|
||||
|
||||
if (!pageBBox.contains(wordBBox)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Rectangle2D wordBBox = QuadPoint.fromPolygons(word.getPolygon()).getTransformed(imageTransform).getBounds2D();
|
||||
Box box = new Box((int) wordBBox.getX(), (int) wordBBox.getY(), (int) wordBBox.getWidth(), (int) wordBBox.getHeight(), 1);
|
||||
Pix wordImage = Leptonica1.pixClipRectangle(pageImage, box, null);
|
||||
box.clear();
|
||||
@ -172,19 +237,65 @@ public class WritableOcrResultFactory {
|
||||
}
|
||||
|
||||
|
||||
private List<TextPositionInImage> buildText(DocumentPage resultPage, AffineTransform pageCtm, Lookups lookups, PageInformation pageInformation) {
|
||||
public List<TextPositionInImage> buildTextWithSnugBBoxes(DocumentPage resultPage,
|
||||
ImageFile imageFile,
|
||||
AffineTransform pageCtm,
|
||||
Lookups lookups,
|
||||
PageInformation pageInformation) {
|
||||
|
||||
Pix pageImage = imageFile.readPix();
|
||||
AffineTransform resultToImageTransform = buildResultToImageTransform(resultPage, pageImage);
|
||||
|
||||
boolean snuggify = canBeSnuggified(resultPage, resultToImageTransform);
|
||||
|
||||
List<TextPositionInImage> list = new ArrayList<>();
|
||||
for (DocumentWord word : resultPage.getWords()) {
|
||||
|
||||
FontInformation fontInformation = FontInformation.determineStyle(word, lookups);
|
||||
|
||||
TextPositionInImage textPositionInImage;
|
||||
if (snuggify) {
|
||||
textPositionInImage = buildTextPositionInImageWithSnugBBox(word, pageCtm, fontInformation, pageImage, resultToImageTransform);
|
||||
} else {
|
||||
textPositionInImage = new TextPositionInImage(QuadPoint.fromPolygons(word.getPolygon()),
|
||||
word.getContent(),
|
||||
pageCtm,
|
||||
fontInformation.font(),
|
||||
fontInformation.fontStyle(),
|
||||
false);
|
||||
}
|
||||
markTextOverlappingIgnoreZone(textPositionInImage, pageInformation.wordBBoxes());
|
||||
list.add(textPositionInImage);
|
||||
}
|
||||
LeptUtils.disposePix(pageImage);
|
||||
return list;
|
||||
}
|
||||
|
||||
|
||||
private boolean canBeSnuggified(DocumentPage resultPage, AffineTransform resultToImageTransform) {
|
||||
|
||||
return settings.isSnuggify() && BBoxSnuggificationService.canBeSnuggified(resultPage, resultToImageTransform);
|
||||
}
|
||||
|
||||
|
||||
public List<TextPositionInImage> buildText(DocumentPage resultPage, AffineTransform pageCtm, Lookups lookups, PageInformation pageInformation) {
|
||||
|
||||
return resultPage.getWords()
|
||||
.stream()
|
||||
.map(word -> buildTextPositionInImage(word, pageCtm, lookups))
|
||||
.map(word -> new TextPositionInImage(QuadPoint.fromPolygons(word.getPolygon()),
|
||||
word.getContent(),
|
||||
pageCtm,
|
||||
FontInformation.determineStyle(word, lookups).font(),
|
||||
FontInformation.determineStyle(word, lookups).fontStyle(),
|
||||
false))
|
||||
.map(textPositionInImage -> markTextOverlappingIgnoreZone(textPositionInImage, pageInformation.wordBBoxes()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
private static int getPageNumber(PageBatch pageOffset, DocumentPage resultPage) {
|
||||
private static int getPageNumber(PageBatch pageBatch, DocumentPage resultPage) {
|
||||
|
||||
return pageOffset.getPageNumber(resultPage.getPageNumber());
|
||||
return pageBatch.getPageNumber(resultPage.getPageNumber());
|
||||
}
|
||||
|
||||
|
||||
@ -214,36 +325,53 @@ public class WritableOcrResultFactory {
|
||||
.flatMap(Collection::stream), Functions.identity());
|
||||
|
||||
return new Lookups(boldLookup, italicLookup, handWrittenLookup);
|
||||
|
||||
}
|
||||
|
||||
|
||||
private TextPositionInImage buildTextPositionInImage(DocumentWord dw, AffineTransform imageCTM, Lookups lookups) {
|
||||
@SneakyThrows
|
||||
private TextPositionInImage buildTextPositionInImageWithSnugBBox(DocumentWord dw,
|
||||
AffineTransform imageCTM,
|
||||
FontInformation fontInformation,
|
||||
Pix pageImage,
|
||||
AffineTransform resultToImageTransform) {
|
||||
|
||||
boolean bold = lookups.bold().containedInAnySpan(dw.getSpan());
|
||||
boolean italic = lookups.italic().containedInAnySpan(dw.getSpan());
|
||||
boolean handwritten = lookups.handwritten().containedInAnySpan(dw.getSpan());
|
||||
QuadPoint origin = QuadPoint.fromPolygons(dw.getPolygon());
|
||||
|
||||
FontStyle fontStyle;
|
||||
FontMetricsProvider font;
|
||||
if (handwritten) {
|
||||
fontStyle = FontStyle.HANDWRITTEN;
|
||||
font = regularFont;
|
||||
} else if (italic && bold) {
|
||||
fontStyle = FontStyle.BOLD_ITALIC;
|
||||
font = boldItalicFont;
|
||||
} else if (bold) {
|
||||
fontStyle = FontStyle.BOLD;
|
||||
font = boldFont;
|
||||
} else if (italic) {
|
||||
fontStyle = FontStyle.ITALIC;
|
||||
font = italicFont;
|
||||
} else {
|
||||
fontStyle = FontStyle.REGULAR;
|
||||
font = regularFont;
|
||||
Optional<QuadPoint> snugBBox = BBoxSnuggificationService.snuggify(pageImage, dw, resultToImageTransform);
|
||||
|
||||
return new TextPositionInImage(snugBBox.orElse(origin), dw.getContent(), imageCTM, fontInformation.font(), fontInformation.fontStyle(), snugBBox.isPresent());
|
||||
}
|
||||
|
||||
|
||||
private record FontInformation(FontStyle fontStyle, FontMetricsProvider font) {
|
||||
|
||||
public static FontInformation determineStyle(DocumentWord dw, Lookups lookups) {
|
||||
|
||||
boolean bold = lookups.bold().containedInAnySpan(dw.getSpan());
|
||||
boolean italic = lookups.italic().containedInAnySpan(dw.getSpan());
|
||||
boolean handwritten = lookups.handwritten().containedInAnySpan(dw.getSpan());
|
||||
|
||||
FontStyle fontStyle;
|
||||
FontMetricsProvider font;
|
||||
if (handwritten) {
|
||||
fontStyle = FontStyle.HANDWRITTEN;
|
||||
font = Type0FontMetricsProvider.REGULAR_INSTANCE;
|
||||
} else if (italic && bold) {
|
||||
fontStyle = FontStyle.BOLD_ITALIC;
|
||||
font = Type0FontMetricsProvider.BOLD_ITALIC_INSTANCE;
|
||||
} else if (bold) {
|
||||
fontStyle = FontStyle.BOLD;
|
||||
font = Type0FontMetricsProvider.BOLD_INSTANCE;
|
||||
} else if (italic) {
|
||||
fontStyle = FontStyle.ITALIC;
|
||||
font = Type0FontMetricsProvider.ITALIC_INSTANCE;
|
||||
} else {
|
||||
fontStyle = FontStyle.REGULAR;
|
||||
font = Type0FontMetricsProvider.REGULAR_INSTANCE;
|
||||
}
|
||||
return new FontInformation(fontStyle, font);
|
||||
}
|
||||
|
||||
return new TextPositionInImage(dw, imageCTM, font, fontStyle);
|
||||
}
|
||||
|
||||
|
||||
@ -307,7 +435,7 @@ public class WritableOcrResultFactory {
|
||||
}
|
||||
|
||||
|
||||
public static AffineTransform getPageCTM(PageInformation pageInformation, double imageWidth) {
|
||||
public static AffineTransform buildResultToPageTransform(PageInformation pageInformation, double imageWidth) {
|
||||
|
||||
double scalingFactor = calculateScalingFactor(imageWidth, pageInformation);
|
||||
AffineTransform imageToCropBoxScaling = new AffineTransform(scalingFactor, 0, 0, scalingFactor, 0, 0);
|
||||
@ -353,7 +481,7 @@ public class WritableOcrResultFactory {
|
||||
}
|
||||
|
||||
|
||||
private record Lookups(SpanLookup<DocumentSpan> bold, SpanLookup<DocumentSpan> italic, SpanLookup<DocumentSpan> handwritten) {
|
||||
public record Lookups(SpanLookup<DocumentSpan> bold, SpanLookup<DocumentSpan> italic, SpanLookup<DocumentSpan> handwritten) {
|
||||
|
||||
public static Lookups empty() {
|
||||
|
||||
|
||||
@ -8,7 +8,7 @@ import lombok.SneakyThrows;
|
||||
|
||||
public interface FontMetricsProvider extends EmbeddableFont {
|
||||
|
||||
default FontMetrics calculateMetrics(String text, double textWidth, double textHeight) {
|
||||
default FontMetrics calculateMetricsForAzureBBox(String text, double textWidth, double textHeight) {
|
||||
|
||||
HeightAndDescent heightAndDescent = calculateHeightAndDescent(text);
|
||||
float fontSize = calculateFontSize(text, textWidth);
|
||||
@ -18,6 +18,16 @@ public interface FontMetricsProvider extends EmbeddableFont {
|
||||
}
|
||||
|
||||
|
||||
default FontMetrics calculateMetricsForTightBBox(String text, double textWidth, double textHeight) {
|
||||
|
||||
HeightAndDescent heightAndDescent = calculateHeightAndDescent(text);
|
||||
float fontSize = calculateFontSize(text, textWidth);
|
||||
float heightScaling = (float) ((textHeight / (heightAndDescent.height() - heightAndDescent.descent())) * 1000) / fontSize;
|
||||
|
||||
return new FontMetrics((heightAndDescent.descent() / 1000) * fontSize, fontSize, heightScaling);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
default float calculateFontSize(String text, double textWidth) {
|
||||
|
||||
|
||||
@ -42,7 +42,7 @@ import lombok.experimental.FieldDefaults;
|
||||
public class IdpLayer extends IdpLayerConfig {
|
||||
|
||||
public static final int LINE_WIDTH = 1;
|
||||
private Map<Integer, AffineTransform> pageCtms;
|
||||
private Map<Integer, AffineTransform> resultToPageTransform;
|
||||
|
||||
|
||||
public void addSection(int pageNumber, DocumentSection section, SpanLookup<DocumentWord> wordsOnPage) {
|
||||
@ -65,7 +65,7 @@ public class IdpLayer extends IdpLayerConfig {
|
||||
|
||||
var sectionsOnPage = getOrCreateVisualizationsOnPage(pageNumber, vis);
|
||||
|
||||
sectionsOnPage.getColoredRectangles().add(new ColoredRectangle(bbox.getTransformed(pageCtms.get(pageNumber)).getBounds2D(), color, LINE_WIDTH));
|
||||
sectionsOnPage.getColoredRectangles().add(new ColoredRectangle(bbox.getTransformed(resultToPageTransform.get(pageNumber)).getBounds2D(), color, LINE_WIDTH));
|
||||
}
|
||||
|
||||
|
||||
@ -89,15 +89,20 @@ public class IdpLayer extends IdpLayerConfig {
|
||||
if (keyValue.getValue() != null) {
|
||||
addBoundingRegion(keyValue.getValue().getBoundingRegions(), keyValuePairs, VALUE_COLOR, pageOffset);
|
||||
|
||||
if (keyValue.getKey().getBoundingRegions().get(0).getPageNumber() != keyValue.getValue().getBoundingRegions().get(0).getPageNumber()) {
|
||||
if (keyValue.getKey().getBoundingRegions()
|
||||
.get(0).getPageNumber() != keyValue.getValue().getBoundingRegions()
|
||||
.get(0).getPageNumber()) {
|
||||
return;
|
||||
}
|
||||
int pageNumberWithOffset = pageOffset.getPageNumber(keyValue.getKey().getBoundingRegions().get(0).getPageNumber());
|
||||
QuadPoint key = QuadPoint.fromPolygons(keyValue.getKey().getBoundingRegions().get(0).getPolygon());
|
||||
QuadPoint value = QuadPoint.fromPolygons(keyValue.getValue().getBoundingRegions().get(0).getPolygon());
|
||||
int pageNumberWithOffset = pageOffset.getPageNumber(keyValue.getKey().getBoundingRegions()
|
||||
.get(0).getPageNumber());
|
||||
QuadPoint key = QuadPoint.fromPolygons(keyValue.getKey().getBoundingRegions()
|
||||
.get(0).getPolygon());
|
||||
QuadPoint value = QuadPoint.fromPolygons(keyValue.getValue().getBoundingRegions()
|
||||
.get(0).getPolygon());
|
||||
|
||||
var line = LineUtils.findClosestMidpointLine(key, value);
|
||||
line = LineUtils.transform(line, pageCtms.get(pageNumberWithOffset));
|
||||
line = LineUtils.transform(line, resultToPageTransform.get(pageNumberWithOffset));
|
||||
var arrowHead = LineUtils.createArrowHead(line, Math.min(LineUtils.length(line), 5));
|
||||
var linesOnPage = getOrCreateVisualizationsOnPage(pageNumberWithOffset, keyValuePairs).getColoredLines();
|
||||
linesOnPage.add(new ColoredLine(line, KEY_VALUE_BBOX_COLOR, LINE_WIDTH));
|
||||
@ -142,7 +147,7 @@ public class IdpLayer extends IdpLayerConfig {
|
||||
private void addPolygon(int pageNumber, List<Double> polygon, Visualizations visualizations, Color color) {
|
||||
|
||||
VisualizationsOnPage visualizationsOnPage = getOrCreateVisualizationsOnPage(pageNumber, visualizations);
|
||||
visualizationsOnPage.getColoredLines().addAll(LineUtils.quadPointAsLines(QuadPoint.fromPolygons(polygon).getTransformed(pageCtms.get(pageNumber)), color));
|
||||
visualizationsOnPage.getColoredLines().addAll(LineUtils.quadPointAsLines(QuadPoint.fromPolygons(polygon).getTransformed(resultToPageTransform.get(pageNumber)), color));
|
||||
}
|
||||
|
||||
|
||||
@ -181,7 +186,8 @@ public class IdpLayer extends IdpLayerConfig {
|
||||
|
||||
var vis = getOrCreateVisualizationsOnPage(pageOffset.getPageNumber(boundingRegion.getPageNumber()), tables);
|
||||
|
||||
QuadPoint qp = QuadPoint.fromPolygons(boundingRegion.getPolygon()).getTransformed(pageCtms.get(pageOffset.getPageNumber(boundingRegion.getPageNumber())));
|
||||
QuadPoint qp = QuadPoint.fromPolygons(boundingRegion.getPolygon())
|
||||
.getTransformed(resultToPageTransform.get(pageOffset.getPageNumber(boundingRegion.getPageNumber())));
|
||||
|
||||
vis.getFilledRectangles().add(new FilledRectangle(qp.getBounds2D(), TITLE_COLOR, 0.2f));
|
||||
|
||||
|
||||
@ -20,9 +20,9 @@ public class IdpLayerFactory {
|
||||
private final IdpLayer idpLayer;
|
||||
|
||||
|
||||
IdpLayerFactory(Map<Integer, AffineTransform> pageCtms) {
|
||||
IdpLayerFactory(Map<Integer, AffineTransform> resultToPageTransform) {
|
||||
|
||||
this.idpLayer = new IdpLayer(pageCtms);
|
||||
this.idpLayer = new IdpLayer(resultToPageTransform);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,17 +1,21 @@
|
||||
package com.knecon.fforesight.service.ocr.processor.visualizations.layers;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.azure.ai.documentintelligence.models.AnalyzeResult;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
|
||||
import com.knecon.fforesight.service.ocr.processor.service.OcrExecutionSupervisor;
|
||||
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
|
||||
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
|
||||
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResult;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResultFactory;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
||||
import com.knecon.fforesight.service.viewerdoc.layers.LayerGroup;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
@ -26,32 +30,43 @@ public class LayerFactory {
|
||||
OcrDebugLayerFactory ocrDebugLayerFactory;
|
||||
OcrTextLayerFactory ocrTextLayerFactory;
|
||||
OcrServiceSettings settings;
|
||||
Set<AzureOcrFeature> features;
|
||||
Map<Integer, Double> angles;
|
||||
|
||||
|
||||
public LayerFactory(OcrServiceSettings settings, OcrExecutionSupervisor supervisor, ImageProcessingSupervisor imageSupervisor, Map<Integer, PageInformation> pageInformation) {
|
||||
public LayerFactory(OcrServiceSettings settings,
|
||||
Set<AzureOcrFeature> features,
|
||||
OcrExecutionSupervisor supervisor,
|
||||
Map<Integer, PageInformation> pageInformation,
|
||||
ImageProcessingPipeline imageProcessingPipeline) {
|
||||
|
||||
this.writableOcrResultFactory = new WritableOcrResultFactory(pageInformation, settings, imageSupervisor);
|
||||
this.idpLayerFactory = new IdpLayerFactory(writableOcrResultFactory.getPageCtms());
|
||||
this.writableOcrResultFactory = new WritableOcrResultFactory(pageInformation, imageProcessingPipeline, settings, features);
|
||||
this.idpLayerFactory = new IdpLayerFactory(writableOcrResultFactory.getResultToPageTransforms());
|
||||
this.ocrDebugLayerFactory = new OcrDebugLayerFactory();
|
||||
this.ocrTextLayerFactory = new OcrTextLayerFactory();
|
||||
this.settings = settings;
|
||||
this.features = features;
|
||||
this.supervisor = supervisor;
|
||||
this.angles = Collections.synchronizedMap(new HashMap<>());
|
||||
}
|
||||
|
||||
|
||||
public void addAnalyzeResult(PageBatch pageRange, AnalyzeResult analyzeResult) throws InterruptedException {
|
||||
public void processAnalyzeResult(PageBatch batch, AnalyzeResult analyzeResult) throws InterruptedException {
|
||||
|
||||
List<WritableOcrResult> results = writableOcrResultFactory.buildOcrResultToWrite(analyzeResult, batch);
|
||||
|
||||
results.forEach(result -> angles.put(result.getPageNumber(), result.getAngle()));
|
||||
|
||||
List<WritableOcrResult> results = writableOcrResultFactory.buildOcrResultToWrite(analyzeResult, pageRange);
|
||||
ocrTextLayerFactory.addWritableOcrResult(results);
|
||||
|
||||
if (settings.isDebug()) {
|
||||
ocrDebugLayerFactory.addAnalysisResult(results);
|
||||
}
|
||||
if (settings.isIdpEnabled()) {
|
||||
idpLayerFactory.addAnalyzeResult(analyzeResult, pageRange);
|
||||
if (features.contains(AzureOcrFeature.IDP)) {
|
||||
idpLayerFactory.addAnalyzeResult(analyzeResult, batch);
|
||||
}
|
||||
|
||||
this.supervisor.finishMappingResult(pageRange);
|
||||
this.supervisor.finishMappingResult(batch);
|
||||
}
|
||||
|
||||
|
||||
@ -64,10 +79,10 @@ public class LayerFactory {
|
||||
if (settings.isDebug()) {
|
||||
debugLayers.add(ocrDebugLayerFactory.getOcrDebugLayer());
|
||||
}
|
||||
if (settings.isIdpEnabled()) {
|
||||
if (features.contains(AzureOcrFeature.IDP)) {
|
||||
debugLayers.add(idpLayerFactory.getIdpLayer());
|
||||
}
|
||||
return new OcrResult(List.of(ocrTextLayer), debugLayers);
|
||||
return new OcrResult(List.of(ocrTextLayer), debugLayers, angles);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -31,7 +31,7 @@ public class OcrDebugLayer extends OcrDebugLayerConfig {
|
||||
word.getFontMetricsProvider(),
|
||||
Optional.of(word.getTextMatrix()),
|
||||
Optional.of(RenderingMode.FILL)));
|
||||
bboxOnPage.getColoredLines().addAll(LineUtils.quadPointAsLines(word.getTransformedTextBBox()));
|
||||
bboxOnPage.getColoredLines().addAll(LineUtils.quadPointAsLines(word.getTransformedTextBBox(), word.isSnugBBox()));
|
||||
}
|
||||
|
||||
|
||||
@ -57,4 +57,11 @@ public class OcrDebugLayer extends OcrDebugLayerConfig {
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean isVisibleByDefault() {
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
package com.knecon.fforesight.service.ocr.processor.visualizations.layers;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.knecon.fforesight.service.viewerdoc.layers.LayerGroup;
|
||||
|
||||
public record OcrResult(List<LayerGroup> regularLayers, List<LayerGroup> debugLayers) {
|
||||
public record OcrResult(List<LayerGroup> regularLayers, List<LayerGroup> debugLayers, Map<Integer, Double> anglesPerPage) {
|
||||
|
||||
}
|
||||
@ -14,12 +14,19 @@ import lombok.experimental.UtilityClass;
|
||||
@UtilityClass
|
||||
public class LineUtils {
|
||||
|
||||
public List<ColoredLine> quadPointAsLines(QuadPoint rect) {
|
||||
public List<ColoredLine> quadPointAsLines(QuadPoint rect, boolean tight) {
|
||||
|
||||
return List.of(new ColoredLine(new Line2D.Double(rect.a(), rect.b()), Color.ORANGE, 1),
|
||||
if (tight) {
|
||||
return List.of(new ColoredLine(new Line2D.Double(rect.a(), rect.b()), Color.GREEN, 1),
|
||||
new ColoredLine(new Line2D.Double(rect.b(), rect.c()), Color.GREEN, 1),
|
||||
new ColoredLine(new Line2D.Double(rect.c(), rect.d()), Color.GREEN, 1),
|
||||
new ColoredLine(new Line2D.Double(rect.d(), rect.a()), Color.GREEN, 1));
|
||||
}
|
||||
|
||||
return List.of(new ColoredLine(new Line2D.Double(rect.a(), rect.b()), Color.BLUE, 1),
|
||||
new ColoredLine(new Line2D.Double(rect.b(), rect.c()), Color.BLUE, 1),
|
||||
new ColoredLine(new Line2D.Double(rect.c(), rect.d()), Color.GREEN, 1),
|
||||
new ColoredLine(new Line2D.Double(rect.d(), rect.a()), Color.MAGENTA, 1));
|
||||
new ColoredLine(new Line2D.Double(rect.c(), rect.d()), Color.BLUE, 1),
|
||||
new ColoredLine(new Line2D.Double(rect.d(), rect.a()), Color.BLUE, 1));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,205 @@
|
||||
package com.knecon.fforesight.service.ocr.processor.visualizations.utils;
|
||||
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.knecon.fforesight.service.viewerdoc.LayerIdentifier;
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.PDFDoc;
|
||||
import com.pdftron.pdf.Page;
|
||||
import com.pdftron.sdf.Obj;
|
||||
import com.pdftron.sdf.SDFDoc;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class RotationCorrectionUtility {
|
||||
|
||||
public static final LayerIdentifier KNECON_ROTATION_CORRECTION = new LayerIdentifier(null, "ROTATION_CORRECTION");
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void rotatePages(Path inputFile, Path outputFile, Map<Integer, Double> anglesPerPage) {
|
||||
|
||||
Path tmp = Files.createTempFile("tempDocument", ".pdf");
|
||||
Files.copy(inputFile, tmp, StandardCopyOption.REPLACE_EXISTING);
|
||||
try (var in = new FileInputStream(tmp.toFile()); var out = new FileOutputStream(outputFile.toFile())) {
|
||||
rotatePages(in, out, anglesPerPage);
|
||||
}
|
||||
Files.deleteIfExists(tmp);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void rotatePages(InputStream in, OutputStream out, Map<Integer, Double> anglesPerPage) {
|
||||
|
||||
try (PDFDoc doc = new PDFDoc(in)) {
|
||||
anglesPerPage.forEach((pageNumber, angle) -> rotatePage(pageNumber, doc, angle));
|
||||
doc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void rotatePage(int pageNumber, PDFDoc doc, double angle) {
|
||||
|
||||
int quadrants = getQuadrantRotation(angle);
|
||||
Page page = doc.getPage(pageNumber);
|
||||
page.setRotation((quadrants + page.getRotation()) % 4);
|
||||
double remainingAngle = getRemainingAngle(angle, quadrants);
|
||||
|
||||
Obj contents = page.getContents();
|
||||
String content = buildRotationContent(remainingAngle, page);
|
||||
Obj rotationStream = doc.createIndirectStream(content.getBytes());
|
||||
Obj newContentsArray = doc.createIndirectArray();
|
||||
newContentsArray.pushBack(rotationStream);
|
||||
addPreviousContents(contents, newContentsArray);
|
||||
String closingContent = buildClosingContent();
|
||||
Obj closingStream = doc.createIndirectStream(closingContent.getBytes());
|
||||
newContentsArray.pushBack(closingStream);
|
||||
page.getSDFObj().erase("Contents");
|
||||
page.getSDFObj().put("Contents", newContentsArray);
|
||||
}
|
||||
|
||||
|
||||
private String buildClosingContent() {
|
||||
|
||||
List<String> closingCommands = new LinkedList<>();
|
||||
closingCommands.add("Q");
|
||||
return String.join("\n", closingCommands);
|
||||
}
|
||||
|
||||
|
||||
private String buildRotationContent(double angle, Page page) throws PDFNetException {
|
||||
|
||||
List<String> commands = new LinkedList<>();
|
||||
|
||||
double scale = getScalingFactor(angle, page);
|
||||
commands.add("q");
|
||||
commands.add("/%s <<>> BDC".formatted(KNECON_ROTATION_CORRECTION.markedContentName()));
|
||||
commands.add(buildMatrixCommands(AffineTransform.getTranslateInstance(page.getPageWidth() / 2, page.getPageHeight() / 2)));
|
||||
commands.add(buildMatrixCommands(AffineTransform.getRotateInstance(Math.toRadians(angle))));
|
||||
commands.add(buildMatrixCommands(AffineTransform.getScaleInstance(scale, scale)));
|
||||
commands.add(buildMatrixCommands(AffineTransform.getTranslateInstance(-page.getPageWidth() / 2, -page.getPageHeight() / 2)));
|
||||
commands.add("EMC");
|
||||
return String.join("\n", commands);
|
||||
}
|
||||
|
||||
|
||||
private void addPreviousContents(Obj contents, Obj newContentsArray) throws PDFNetException {
|
||||
|
||||
switch (contents.getType()) {
|
||||
case Obj.e_array -> {
|
||||
for (int i = 0; i < contents.size(); i++) {
|
||||
newContentsArray.pushBack(contents.getAt(i));
|
||||
}
|
||||
}
|
||||
case Obj.e_stream -> newContentsArray.pushBack(contents);
|
||||
default -> throw new IllegalStateException("Unexpected value: " + contents.getType());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static double getScalingFactor(double angle, Page page) throws PDFNetException {
|
||||
|
||||
double width = page.getPageWidth();
|
||||
double height = page.getPageHeight();
|
||||
return getScalingFactor(angle, width, height);
|
||||
}
|
||||
|
||||
|
||||
public static double getScalingFactor(double angle, double w, double h) {
|
||||
|
||||
if (Math.abs(angle) < 20) {
|
||||
return 1;
|
||||
}
|
||||
double sin = Math.abs(Math.sin(Math.toRadians(angle)));
|
||||
double cos = Math.abs(Math.cos(Math.toRadians(angle)));
|
||||
double newWidth = w * cos + h * sin;
|
||||
double newHeight = h * cos + w * sin;
|
||||
return Math.min(w / newWidth, h / newHeight);
|
||||
}
|
||||
|
||||
|
||||
public static AffineTransform buildTransform(double angle, double originalWidth, double originalHeight) {
|
||||
|
||||
int quadrants = getQuadrantRotation(angle);
|
||||
|
||||
double h = originalHeight;
|
||||
double w = originalWidth;
|
||||
|
||||
if (quadrants == 1 || quadrants == 3) {
|
||||
w = originalHeight;
|
||||
h = originalWidth;
|
||||
}
|
||||
|
||||
AffineTransform quadrantRotation = switch (quadrants) {
|
||||
case 1 -> new AffineTransform(0, 1, -1, 0, h, 0);
|
||||
case 2 -> new AffineTransform(-1, 0, 0, -1, w, h);
|
||||
case 3 -> new AffineTransform(0, -1, 1, 0, w - h, h);
|
||||
default -> new AffineTransform();
|
||||
};
|
||||
|
||||
double remainder = getRemainingAngle(angle, quadrants);
|
||||
double scale = getScalingFactor(remainder, w, h);
|
||||
|
||||
AffineTransform transform = new AffineTransform();
|
||||
transform.translate(w / 2, h / 2);
|
||||
transform.rotate(Math.toRadians(remainder));
|
||||
transform.scale(scale, scale);
|
||||
transform.translate(-w / 2, -h / 2);
|
||||
transform.concatenate(quadrantRotation);
|
||||
|
||||
return transform;
|
||||
}
|
||||
|
||||
|
||||
public static int getQuadrantRotation(double angle) {
|
||||
|
||||
double remainder = angle % 360;
|
||||
|
||||
if (remainder < 0) {
|
||||
remainder += 360;
|
||||
}
|
||||
|
||||
if (remainder > 315 || remainder <= 45) {
|
||||
return 0;
|
||||
} else if (remainder > 45 && remainder <= 135) {
|
||||
return 1;
|
||||
} else if (remainder > 135 && remainder <= 225) {
|
||||
return 2;
|
||||
} else {
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static double getRemainingAngle(double angle, int quadrants) {
|
||||
|
||||
double referenceAngle = 90 * quadrants;
|
||||
return angle - referenceAngle;
|
||||
}
|
||||
|
||||
|
||||
public static double getRemainingAngle(double angle) {
|
||||
|
||||
return getRemainingAngle(angle, getQuadrantRotation(angle));
|
||||
}
|
||||
|
||||
|
||||
private String buildMatrixCommands(AffineTransform at) {
|
||||
|
||||
return "%f %f %f %f %f %f cm".formatted(at.getScaleX(), at.getShearX(), at.getShearY(), at.getScaleY(), at.getTranslateX(), at.getTranslateY());
|
||||
}
|
||||
|
||||
}
|
||||
@ -4,8 +4,8 @@ import java.io.File;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
@ -13,11 +13,16 @@ import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
|
||||
import com.knecon.fforesight.service.ocr.processor.initializer.NativeLibrariesInitializer;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
|
||||
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.GhostScriptService;
|
||||
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
|
||||
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingService;
|
||||
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
|
||||
import com.knecon.fforesight.service.ocr.processor.utils.OsUtils;
|
||||
import com.pdftron.pdf.PDFDoc;
|
||||
import com.pdftron.pdf.PDFNet;
|
||||
import com.sun.jna.NativeLibrary;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
@ -31,10 +36,7 @@ class ImageProcessingPipelineTest {
|
||||
@BeforeEach
|
||||
public void setup() {
|
||||
|
||||
System.setProperty("jna.library.path", System.getenv("VCPKG_DYNAMIC_LIB"));
|
||||
try (NativeLibrary leptonicaLib = NativeLibrary.getInstance("leptonica")) {
|
||||
assert leptonicaLib != null;
|
||||
}
|
||||
new NativeLibrariesInitializer("demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a").init();
|
||||
|
||||
ImageProcessingService imageProcessingService = new ImageProcessingService();
|
||||
GhostScriptService ghostScriptService = new GhostScriptService();
|
||||
@ -46,7 +48,7 @@ class ImageProcessingPipelineTest {
|
||||
@SneakyThrows
|
||||
public void testImageProcessingPipeline() {
|
||||
|
||||
String fileName = "/home/kschuettler/Dokumente/TestFiles/OCR/VV-331340.pdf";
|
||||
String fileName = "/home/kschuettler/Dokumente/TestFiles/OCR/VV-331340/VV-331340_OCRED_first15.pdf";
|
||||
|
||||
File file;
|
||||
if (fileName.startsWith("files")) {
|
||||
@ -63,21 +65,26 @@ class ImageProcessingPipelineTest {
|
||||
|
||||
Files.copy(file.toPath(), documentFile, StandardCopyOption.REPLACE_EXISTING);
|
||||
|
||||
int numberOfpages;
|
||||
try (var doc = Loader.loadPDF(file)) {
|
||||
numberOfpages = doc.getNumberOfPages();
|
||||
}
|
||||
Set<Integer> pageNumbers = new HashSet<>();
|
||||
for (int i = 1; i <= numberOfpages; i++) {
|
||||
if (i % 2 == 0) {
|
||||
continue;
|
||||
try (var doc = new PDFDoc(fileName)) {
|
||||
List<Integer> pageNumbers = new LinkedList<>();
|
||||
for (int i = 1; i <= doc.getPageCount(); i++) {
|
||||
if (i % 2 == 0) {
|
||||
continue;
|
||||
}
|
||||
pageNumbers.add(i);
|
||||
}
|
||||
pageNumbers.add(i);
|
||||
PageBatch batch = BatchFactory.create(0, doc, pageNumbers, tmpDir);
|
||||
|
||||
ImageProcessingSupervisor supervisor = imageProcessingPipeline.addToPipeline(batch);
|
||||
|
||||
batch.forEach(pageNumber -> {
|
||||
try {
|
||||
assert supervisor.awaitProcessedPage(pageNumber) != null;
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
ImageProcessingSupervisor supervisor = imageProcessingPipeline.run(pageNumbers, tmpDir.resolve("images"), documentFile.toFile());
|
||||
|
||||
supervisor.awaitAll();
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,70 @@
|
||||
package com.knecon.fforesight.service.ocr.processor.service;
|
||||
|
||||
import static com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility.KNECON_ROTATION_CORRECTION;
|
||||
|
||||
import java.nio.file.Path;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
|
||||
import com.knecon.fforesight.service.viewerdoc.service.PageContentCleaner;
|
||||
import com.pdftron.pdf.ElementReader;
|
||||
import com.pdftron.pdf.ElementWriter;
|
||||
import com.pdftron.pdf.PDFDoc;
|
||||
import com.pdftron.pdf.PDFNet;
|
||||
import com.pdftron.pdf.Page;
|
||||
import com.pdftron.pdf.PageIterator;
|
||||
import com.pdftron.sdf.SDFDoc;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@Disabled // leptonica is not available in build server
|
||||
public class PageRotationTest {
|
||||
|
||||
@BeforeAll
|
||||
public static void setUp() {
|
||||
|
||||
PDFNet.initialize("demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void putRotation() {
|
||||
|
||||
Map<Integer, Double> angles = new HashMap<>();
|
||||
for (int i = 1; i <= 100; i++) {
|
||||
double a = -90 + (i * ((double) 180 / 100));
|
||||
angles.put(i, a);
|
||||
}
|
||||
Path inputFile = Path.of("/home/kschuettler/Dokumente/TestFiles/OCR/VV-331340-first100.pdf");
|
||||
RotationCorrectionUtility.rotatePages(inputFile, Path.of("/tmp").resolve(inputFile.getFileName() + "_rotated.pdf"), angles);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void removeRotation() {
|
||||
|
||||
Path inputFile = Path.of("/tmp/VV-331340-first100.pdf_rotated.pdf");
|
||||
try (var doc = new PDFDoc(inputFile.toFile()
|
||||
.toString()); var reader = new ElementReader(); var writer = new ElementWriter(); PageIterator pageIterator = doc.getPageIterator()) {
|
||||
PageContentCleaner cleaner = PageContentCleaner.builder()
|
||||
.reader(reader)
|
||||
.writer(writer)
|
||||
.markedContentToRemove(Set.of(KNECON_ROTATION_CORRECTION.markedContentName()))
|
||||
.build();
|
||||
|
||||
while (pageIterator.hasNext()) {
|
||||
Page page = pageIterator.next();
|
||||
cleaner.removeMarkedContent(page);
|
||||
}
|
||||
doc.save(inputFile.resolveSibling(inputFile.getFileName() + "_derotated.pdf").toFile().toString(), SDFDoc.SaveMode.LINEARIZED, null);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,246 @@
|
||||
package com.knecon.fforesight.service.ocr.processor.service;
|
||||
|
||||
import static com.knecon.fforesight.service.ocr.processor.service.OCRService.IMAGE_PIPELINE_DIR;
|
||||
import static com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline.PROCESSED_DIR;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.awt.geom.Line2D;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.azure.ai.documentintelligence.models.AnalyzeResult;
|
||||
import com.azure.json.JsonOptions;
|
||||
import com.azure.json.JsonReader;
|
||||
import com.azure.json.implementation.DefaultJsonReader;
|
||||
import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService;
|
||||
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
|
||||
import com.knecon.fforesight.service.ocr.processor.initializer.NativeLibrariesInitializer;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.TextPositionInImage;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResult;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResultFactory;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.layers.OcrDebugLayerFactory;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint;
|
||||
import com.knecon.fforesight.service.viewerdoc.service.PDFTronViewerDocumentService;
|
||||
import com.pdftron.pdf.ColorPt;
|
||||
import com.pdftron.pdf.ColorSpace;
|
||||
import com.pdftron.pdf.Element;
|
||||
import com.pdftron.pdf.ElementBuilder;
|
||||
import com.pdftron.pdf.ElementWriter;
|
||||
import com.pdftron.pdf.PDFDoc;
|
||||
import com.pdftron.pdf.Page;
|
||||
import com.sun.jna.Memory;
|
||||
import com.sun.jna.Native;
|
||||
import com.sun.jna.Pointer;
|
||||
import com.sun.jna.ptr.PointerByReference;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import net.sourceforge.lept4j.Box;
|
||||
import net.sourceforge.lept4j.Boxa;
|
||||
import net.sourceforge.lept4j.util.LeptUtils;
|
||||
|
||||
@Disabled // leptonica is not available in build server
|
||||
public class SnugBoxesTest {
|
||||
|
||||
public static final int PAGE_NUMBER = 41;
|
||||
public static final Path ORIGIN_FILE = Path.of("/home/kschuettler/Dokumente/TestFiles/OCR/VV-331340-first100.pdf");
|
||||
public static final Path TEST_FOLDER = Path.of("/tmp/OCR_TEST/").resolve(ORIGIN_FILE.getFileName());
|
||||
public static final Path PROCESSED_FOLDER = TEST_FOLDER.resolve(IMAGE_PIPELINE_DIR).resolve(PROCESSED_DIR);
|
||||
public static final Path DESTINATION_FILE = TEST_FOLDER.resolve("SnugBoxesTest.pdf");
|
||||
public static final Path RESULT_FILE = TEST_FOLDER.resolve(IMAGE_PIPELINE_DIR).resolve("azure_result_0.json");
|
||||
|
||||
PDFTronViewerDocumentService viewerDocumentService = new PDFTronViewerDocumentService(null);
|
||||
|
||||
|
||||
@BeforeAll
|
||||
public static void setUp() {
|
||||
|
||||
new NativeLibrariesInitializer("demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a").init();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void snugBoxes() {
|
||||
|
||||
String filePath = ORIGIN_FILE.toFile().toString();
|
||||
File file = PROCESSED_FOLDER.resolve("output_0.%04d.tiff".formatted(PAGE_NUMBER)).toFile();
|
||||
assert file.exists();
|
||||
ImageFile imageFile = new ImageFile(PAGE_NUMBER, file.toString());
|
||||
AnalyzeResult result = null;
|
||||
try (var in = new FileInputStream(RESULT_FILE.toFile()); JsonReader reader = DefaultJsonReader.fromStream(in, new JsonOptions());) {
|
||||
result = AnalyzeResult.fromJson(reader);
|
||||
}
|
||||
|
||||
var resultPage = result.getPages()
|
||||
.get(PAGE_NUMBER - 1);
|
||||
WritableOcrResultFactory writableOcrResultFactory = new WritableOcrResultFactory(null, null, new OcrServiceSettings(), Set.of());
|
||||
OcrDebugLayerFactory debugLayerFactory = new OcrDebugLayerFactory();
|
||||
InvisibleElementRemovalService invisibleElementRemovalService = new InvisibleElementRemovalService();
|
||||
try (var in = new FileInputStream(ORIGIN_FILE.toFile()); var out = new FileOutputStream(DESTINATION_FILE.toFile())) {
|
||||
invisibleElementRemovalService.removeInvisibleElements(in, out, false);
|
||||
}
|
||||
PageInformation pageInformation = getPageInformation(PAGE_NUMBER, DESTINATION_FILE.toFile().toString());
|
||||
WritableOcrResultFactory.Lookups empty = WritableOcrResultFactory.Lookups.empty();
|
||||
|
||||
AffineTransform pageCtm = getPageCtm(PAGE_NUMBER, filePath, resultPage.getWidth());
|
||||
// pageCtm.preConcatenate(rotationCorrection);
|
||||
// pageCtm.preConcatenate(quadrantTransform);
|
||||
// Pix pageImage = imageFile.readPix();
|
||||
// AffineTransform imageTransform = WritableOcrResultFactory.buildImageTransform(resultPage, pageImage);
|
||||
// List<Rectangle2D> rects = new LinkedList<>();
|
||||
// for (DocumentWord word : resultPage.getWords()) {
|
||||
// QuadPoint quadPoint = QuadPoint.fromPolygons(word.getPolygon());
|
||||
// Rectangle2D rect = quadPoint.getTransformed(imageTransform).getBounds2D();
|
||||
// if (rect.getX() > 0 && rect.getY() > 0 && rect.getMaxX() < pageImage.w && rect.getMaxY() < pageImage.h) {
|
||||
// rects.add(rect);
|
||||
// }
|
||||
// }
|
||||
// Boxa boxa = createBoxaFromRectangles(rects);
|
||||
// Pix drawedPix = Leptonica1.pixDrawBoxa(pageImage, boxa, 5, 1);
|
||||
// Leptonica1.pixWrite("/tmp/OCR_TEST/VV-331340-first100.pdf/image_pipeline/page_" + PAGE_NUMBER + ".tiff", drawedPix, 5);
|
||||
|
||||
//
|
||||
|
||||
List<TextPositionInImage> words = writableOcrResultFactory.buildTextWithSnugBBoxes(resultPage, imageFile, pageCtm, empty, pageInformation);
|
||||
var results = new WritableOcrResult(PAGE_NUMBER, -resultPage.getAngle(), words, Collections.emptyList());
|
||||
debugLayerFactory.addAnalysisResult(List.of(results));
|
||||
|
||||
// try (var doc = new PDFDoc(tmpFile.toString()); var out = new FileOutputStream(DESTINATION_FILE.toFile())) {
|
||||
// PageRotationHelper.rotatePage(PAGE_NUMBER, doc, -resultPage.getAngle());
|
||||
// var rects = resultPage.getWords()
|
||||
// .stream()
|
||||
// .map(DocumentWord::getPolygon)
|
||||
// .map(QuadPoint::fromPolygons)
|
||||
// .map(qp -> qp.getTransformed(pageCtm))
|
||||
// .toList();
|
||||
// drawRects(doc, rects, PAGE_NUMBER);
|
||||
// doc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
|
||||
// }
|
||||
// Files.deleteIfExists(tmpFile);
|
||||
|
||||
viewerDocumentService.addLayerGroups(DESTINATION_FILE.toFile(), DESTINATION_FILE.toFile(), List.of(debugLayerFactory.getOcrDebugLayer()));
|
||||
RotationCorrectionUtility.rotatePages(DESTINATION_FILE, DESTINATION_FILE, Map.of(PAGE_NUMBER, -resultPage.getAngle()));
|
||||
}
|
||||
|
||||
//
|
||||
// private static List<Rectangle2D> readRectsFromBoxa(Boxa boxa) {
|
||||
//
|
||||
// Pointer[] pointers = boxa.box.getPointer().getPointerArray(0, boxa.n);
|
||||
// List<Rectangle2D> boxes = new ArrayList<>(boxa.n);
|
||||
// for (int i = 0; i < boxa.n; i++) {
|
||||
// Box box = new Box(pointers[i]);
|
||||
// boxes.add(new Rectangle2D.Double(box.x, box.y, box.w, box.h));
|
||||
// LeptUtils.dispose(box);
|
||||
// }
|
||||
// return boxes;
|
||||
// }
|
||||
//
|
||||
//
|
||||
// @SuppressWarnings("PMD") // Memory will be de-allocated with boxa
|
||||
// public static Boxa createBoxaFromRectangles(List<Rectangle2D> rectangles) {
|
||||
//
|
||||
// if (rectangles.isEmpty()) {
|
||||
// return new Boxa();
|
||||
// }
|
||||
//
|
||||
// int n = rectangles.size(); // Number of rectangles
|
||||
// int nalloc = n; // Allocating memory for exactly 'n' boxes
|
||||
// int refcount = 1; // Default refcount
|
||||
//
|
||||
// Pointer boxPointerArray = new Memory((long) Native.POINTER_SIZE * n); // Memory for n pointers
|
||||
//
|
||||
// for (int i = 0; i < n; i++) {
|
||||
//
|
||||
// Rectangle2D rect = rectangles.get(i);
|
||||
// var mem = new Memory(20L);
|
||||
// mem.setInt(0, (int) rect.getX());
|
||||
// mem.setInt(4, (int) rect.getY());
|
||||
// mem.setInt(8, (int) rect.getWidth());
|
||||
// mem.setInt(12, (int) rect.getHeight());
|
||||
// mem.setInt(16, refcount);
|
||||
//
|
||||
// // Write the pointer of each Box into the native memory
|
||||
// boxPointerArray.setPointer((long) Native.POINTER_SIZE * i, mem);
|
||||
// }
|
||||
//
|
||||
// // Create a PointerByReference pointing to the native memory of the array
|
||||
// PointerByReference boxPointerRef = new PointerByReference();
|
||||
// boxPointerRef.setPointer(boxPointerArray);
|
||||
//
|
||||
// // Create the Boxa instance
|
||||
//
|
||||
// return new Boxa(n, nalloc, refcount, boxPointerRef);
|
||||
// }
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void drawRects(PDFDoc doc, List<QuadPoint> quadPoints, int pageNumber) {
|
||||
|
||||
try (ElementWriter writer = new ElementWriter(); ElementBuilder builder = new ElementBuilder()) {
|
||||
Page page = doc.getPage(pageNumber);
|
||||
writer.begin(page, ElementWriter.e_overlay);
|
||||
for (QuadPoint quadPoint : quadPoints) {
|
||||
quadPoint.asLines()
|
||||
.forEach(line -> {
|
||||
drawLine(line, builder, writer);
|
||||
});
|
||||
}
|
||||
writer.end();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private static void drawLine(Line2D l, ElementBuilder builder, ElementWriter writer) {
|
||||
|
||||
float[] rgbComponents = Color.BLUE.getRGBColorComponents(null);
|
||||
|
||||
builder.pathBegin();
|
||||
builder.moveTo(l.getX1(), l.getY1());
|
||||
builder.lineTo(l.getX2(), l.getY2());
|
||||
Element line = builder.pathEnd();
|
||||
|
||||
line.setPathStroke(true);
|
||||
line.setPathFill(false);
|
||||
line.getGState().setLineWidth(1);
|
||||
line.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
|
||||
|
||||
try (ColorPt color = new ColorPt(rgbComponents[0], rgbComponents[1], rgbComponents[2])) {
|
||||
line.getGState().setStrokeColor(color);
|
||||
}
|
||||
writer.writeElement(line);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private static AffineTransform getPageCtm(int pageNumber, String file, double imageWidh) {
|
||||
|
||||
return WritableOcrResultFactory.buildResultToPageTransform(getPageInformation(pageNumber, file), imageWidh);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private static PageInformation getPageInformation(int pageNumber, String file) {
|
||||
|
||||
try (var in = new FileInputStream(file); var doc = new PDFDoc(in)) {
|
||||
return PageInformation.fromPage(pageNumber, doc.getPage(pageNumber));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -20,7 +20,7 @@ class Type0FontMetricsProviderTest {
|
||||
|
||||
try (PDDocument document = Loader.loadPDF(new File(Type0FontMetricsProviderTest.class.getClassLoader().getResource("InvisibleText.pdf").getPath()))) {
|
||||
Type0FontMetricsProvider metricsFactory = Type0FontMetricsProvider.regular(document);
|
||||
FontMetrics fontMetrics = metricsFactory.calculateMetrics("deine mutter", 100, 50);
|
||||
FontMetrics fontMetrics = metricsFactory.calculateMetricsForAzureBBox("deine mutter", 100, 50);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -8,6 +8,9 @@ plugins {
|
||||
id("org.sonarqube") version "4.3.0.3225"
|
||||
id("io.freefair.lombok") version "8.4"
|
||||
}
|
||||
pmd {
|
||||
isConsoleOutput = true
|
||||
}
|
||||
|
||||
configurations {
|
||||
all {
|
||||
|
||||
@ -7,6 +7,7 @@ import java.nio.file.Path;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.temporal.ChronoUnit;
|
||||
|
||||
import org.slf4j.MDC;
|
||||
import org.springframework.amqp.AmqpRejectAndDontRequeueException;
|
||||
import org.springframework.amqp.core.Message;
|
||||
import org.springframework.amqp.rabbit.annotation.RabbitHandler;
|
||||
@ -54,8 +55,9 @@ public class OcrMessageReceiver {
|
||||
Path tmpDir = Files.createTempDirectory(null);
|
||||
|
||||
try {
|
||||
MDC.put("fileId", fileId);
|
||||
log.info("--------------------------------------------------------------------------");
|
||||
log.info("Start ocr for file with dossierId {} and fileId {}", dossierId, fileId);
|
||||
log.info("Starting OCR");
|
||||
|
||||
ocrMessageSender.sendOCRStarted(fileId);
|
||||
|
||||
@ -65,7 +67,7 @@ public class OcrMessageReceiver {
|
||||
|
||||
fileStorageService.downloadFiles(request, documentFile);
|
||||
|
||||
ocrService.runOcrOnDocument(dossierId, fileId, request.isRemoveWatermark(), tmpDir, documentFile, viewerDocumentFile, analyzeResultFile);
|
||||
ocrService.runOcrOnDocument(dossierId, fileId, request.getFeatures(), tmpDir, documentFile, viewerDocumentFile, analyzeResultFile);
|
||||
|
||||
fileStorageService.storeFiles(request, documentFile, viewerDocumentFile, analyzeResultFile);
|
||||
|
||||
@ -76,6 +78,7 @@ public class OcrMessageReceiver {
|
||||
in.getMessageProperties().getHeaders().put(MessagingConfiguration.X_ERROR_INFO_TIMESTAMP_HEADER, OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS));
|
||||
throw new RuntimeException(e);
|
||||
} finally {
|
||||
MDC.remove("fileId");
|
||||
FileSystemUtils.deleteRecursively(tmpDir);
|
||||
}
|
||||
}
|
||||
|
||||
@ -7,11 +7,21 @@
|
||||
<include resource="org/springframework/boot/logging/logback/console-appender.xml"/>
|
||||
|
||||
<appender name="JSON" class="ch.qos.logback.core.ConsoleAppender">
|
||||
<encoder class="net.logstash.logback.encoder.LogstashEncoder"/>
|
||||
<encoder class="net.logstash.logback.encoder.LogstashEncoder">
|
||||
<pattern>%d{yyyy-MM-dd HH:mm:ss}%replace( [file:%X{fileId}]){' \[file:\]', ''} [%thread] %-5level%logger{36} - %msg%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
|
||||
<encoder>
|
||||
<pattern>%d{yyyy-MM-dd HH:mm:ss}%replace( [file:%X{fileId}]){' \[file:\]', ''} [%thread] %-5level%logger{36} - %msg%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<root level="INFO">
|
||||
<appender-ref ref="${logType}"/>
|
||||
</root>
|
||||
|
||||
<logger name="com.iqser.red.pdftronlogic.commons" level="ERROR"/>
|
||||
|
||||
</configuration>
|
||||
@ -8,7 +8,9 @@ import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.MockitoAnnotations;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.springframework.amqp.rabbit.core.RabbitAdmin;
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.amqp.rabbit.listener.RabbitListenerEndpointRegistry;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
|
||||
@ -52,8 +54,13 @@ public class AbstractTest {
|
||||
@MockBean
|
||||
protected RabbitTemplate rabbitTemplate;
|
||||
|
||||
private static String pdftronLicense;
|
||||
@MockBean
|
||||
private RabbitAdmin rabbitAdmin;
|
||||
|
||||
@MockBean
|
||||
private RabbitListenerEndpointRegistry rabbitListenerEndpointRegistry;
|
||||
|
||||
private static String pdftronLicense;
|
||||
|
||||
@BeforeEach
|
||||
public void openMocks() {
|
||||
|
||||
@ -9,23 +9,28 @@ import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.MDC;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.knecon.fforesight.service.ocr.processor.service.OCRService;
|
||||
import com.knecon.fforesight.service.ocr.processor.utils.OsUtils;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@Disabled // in order to run, the azure.key must be set first in the application.yml
|
||||
@Disabled
|
||||
// in order to run, the azure.key must be set first in the application.yml and you must set the env variable VCPKG_DYNAMIC_LIB to your tesseract and leptonica installation folder
|
||||
@SpringBootTest()
|
||||
public class OcrServiceIntegrationTest extends AbstractTest {
|
||||
|
||||
public static final Set<AzureOcrFeature> FEATURES = Set.of(AzureOcrFeature.ROTATION_CORRECTION, AzureOcrFeature.FONT_STYLE_DETECTION);
|
||||
@Autowired
|
||||
private OCRService ocrService;
|
||||
|
||||
@ -34,7 +39,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
|
||||
@SneakyThrows
|
||||
public void testOcrWith2000PageFile() {
|
||||
|
||||
testOCR("/home/kschuettler/Dokumente/TestFiles/OCR/VV-331340-first100.pdf");
|
||||
testOCR("/home/kschuettler/Dokumente/TestFiles/OCR/brokenText.pdf");
|
||||
}
|
||||
|
||||
|
||||
@ -50,7 +55,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
|
||||
@SneakyThrows
|
||||
public void testOcrWithFile() {
|
||||
|
||||
testOCR("/home/kschuettler/Dokumente/TestFiles/syn-dm-testfiles/1.A16148F - Toxicidade oral aguda.pdf");
|
||||
testOCR("/home/kschuettler/Dokumente/LayoutparsingEvaluation/RAW_FILES/Difficult Headlines/VV-284053.pdf/VV-284053.pdf.ORIGIN.pdf");
|
||||
}
|
||||
|
||||
|
||||
@ -58,7 +63,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
|
||||
@SneakyThrows
|
||||
public void testOcrWithFolder() {
|
||||
|
||||
String dir = "/home/kschuettler/Dokumente/TestFiles/BASF/Documine_Test_docs/2013-1110704.pdf";
|
||||
String dir = "/home/kschuettler/Dokumente/TestFiles/OCR/TestSet";
|
||||
List<File> foundFiles = Files.walk(Path.of(dir))
|
||||
.sorted(Comparator.comparingLong(this::getFileSize))
|
||||
.map(Path::toFile)
|
||||
@ -97,6 +102,8 @@ public class OcrServiceIntegrationTest extends AbstractTest {
|
||||
@SneakyThrows
|
||||
private String testOCR(File file) {
|
||||
|
||||
MDC.put("fileId", "test");
|
||||
|
||||
Path tmpDir = Path.of(OsUtils.getTemporaryDirectory()).resolve("OCR_TEST").resolve(file.toPath().getFileName());
|
||||
|
||||
assert tmpDir.toFile().exists() || tmpDir.toFile().mkdirs();
|
||||
@ -108,7 +115,8 @@ public class OcrServiceIntegrationTest extends AbstractTest {
|
||||
Files.copy(file.toPath(), documentFile, StandardCopyOption.REPLACE_EXISTING);
|
||||
Files.copy(file.toPath(), viewerDocumentFile, StandardCopyOption.REPLACE_EXISTING);
|
||||
|
||||
ocrService.runOcrOnDocument(TEST_DOSSIER_ID, "file", false, tmpDir, documentFile.toFile(), viewerDocumentFile.toFile(), analyzeResultFile.toFile());
|
||||
ocrService.runOcrOnDocument(TEST_DOSSIER_ID, "file", FEATURES, tmpDir, documentFile.toFile(), viewerDocumentFile.toFile(), analyzeResultFile.toFile());
|
||||
MDC.remove("fileId");
|
||||
System.out.println("File:" + documentFile);
|
||||
System.out.println("\n\n");
|
||||
try (var fileStream = new FileInputStream(documentFile.toFile())) {
|
||||
|
||||
@ -28,7 +28,7 @@ if [ -z "$1" ]; then
|
||||
fi
|
||||
|
||||
namespace=${1}
|
||||
deployment_name="ocr-service-v1"
|
||||
deployment_name="azure-ocr-service"
|
||||
|
||||
echo "deploying to ${namespace}"
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user