Merge branch 'RED-10127' into 'main'

RED-10127: improve headline detection by fitting BBoxes tightly and therefore...

See merge request fforesight/azure-ocr-service!17
This commit is contained in:
Kilian Schüttler 2024-10-22 17:03:29 +02:00
commit 6845afb1dd
41 changed files with 1485 additions and 413 deletions

View File

@ -0,0 +1,11 @@
package com.knecon.fforesight.service.ocr.v1.api.model;
public enum AzureOcrFeature {
ROTATION_CORRECTION,
IDP,
FONT_STYLE_DETECTION,
ALL_PAGES,
REMOVE_WATERMARKS
}

View File

@ -1,6 +1,8 @@
package com.knecon.fforesight.service.ocr.v1.api.model;
import java.util.Collections;
import java.util.Optional;
import java.util.Set;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -18,12 +20,13 @@ public class DocumentRequest {
String dossierId;
String fileId;
boolean removeWatermark;
String originDocumentId;
String viewerDocId;
String idpResultId;
boolean removeWatermark;
Set<AzureOcrFeature> features;
public DocumentRequest(String dossierId, String fileId) {
@ -33,18 +36,23 @@ public class DocumentRequest {
originDocumentId = null;
viewerDocId = null;
idpResultId = null;
removeWatermark = false;
features = Collections.emptySet();
}
// needed for backwards compatibility
public DocumentRequest(String dossierId, String fileId, boolean removeWatermark) {
this.dossierId = dossierId;
this.fileId = fileId;
this.removeWatermark = removeWatermark;
originDocumentId = null;
viewerDocId = null;
idpResultId = null;
if (removeWatermark) {
features = Set.of(AzureOcrFeature.REMOVE_WATERMARKS);
} else {
features = Collections.emptySet();
}
}

View File

@ -9,19 +9,61 @@ import java.util.stream.Stream;
public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
/*
B _____ C
| |
A|_____|D
*/
public enum Direction {
RIGHT,
/*
B _____ C
| |
A|_____|D
*/
DOWN,
/*
* A _____ B
* | |
* D|_____|C
*/
LEFT,
/*
* D _____ A
* | |
* C|_____|B
* */
UP,
/*
* C _____ D
* | |
* B|_____|A
*/
NONE
/*
* ? _____ ?
* | |
* ?|_____|?
*/
}
private static final double THRESHOLD_ANGLE = Math.toRadians(5); // QuadPoint is considered straight, when its angles are below this threshold
public static QuadPoint fromRectangle2D(Rectangle2D rectangle2D) {
return new QuadPoint(new Point2D.Double(rectangle2D.getX(), rectangle2D.getY()),
new Point2D.Double(rectangle2D.getX(), rectangle2D.getMaxY()),
new Point2D.Double(rectangle2D.getMaxX(), rectangle2D.getMaxY()),
new Point2D.Double(rectangle2D.getMaxX(), rectangle2D.getY()));
return fromRectangle2D(rectangle2D, Direction.NONE);
}
public static QuadPoint fromRectangle2D(Rectangle2D rectangle2D, Direction direction) {
var lowerLeft = new Point2D.Double(rectangle2D.getX(), rectangle2D.getY());
var upperLeft = new Point2D.Double(rectangle2D.getX(), rectangle2D.getMaxY());
var upperRight = new Point2D.Double(rectangle2D.getMaxX(), rectangle2D.getMaxY());
var lowerRight = new Point2D.Double(rectangle2D.getMaxX(), rectangle2D.getY());
return switch (direction) {
case DOWN -> new QuadPoint(upperLeft, upperRight, lowerRight, lowerLeft);
case LEFT -> new QuadPoint(upperRight, lowerRight, lowerLeft, upperLeft);
case UP -> new QuadPoint(lowerRight, lowerLeft, upperLeft, upperRight);
default -> new QuadPoint(lowerLeft, upperLeft, upperRight, lowerRight);
};
}
@ -56,6 +98,35 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
}
public boolean isHorizontal() {
double angle = calculateAngle(a, d);
double angle2 = calculateAngle(b, c);
return Math.abs(angle) <= THRESHOLD_ANGLE || Math.abs(angle2) <= THRESHOLD_ANGLE;
}
public boolean isVertical() {
double rightAngle = Math.PI / 2;
double angle = calculateAngle(a, d);
double angle2 = calculateAngle(b, c);
return Math.abs(rightAngle - Math.abs(angle)) <= THRESHOLD_ANGLE || Math.abs(rightAngle - Math.abs(angle2)) <= THRESHOLD_ANGLE;
}
public Direction getDirection() {
if (isHorizontal()) {
return a.getX() < d.getX() ? Direction.RIGHT : Direction.LEFT;
}
if (isVertical()) {
return a.getY() < d.getY() ? Direction.UP : Direction.DOWN;
}
return Direction.NONE;
}
public Stream<Line2D> asLines() {
return Stream.of(new Line2D.Double(a(), b()), new Line2D.Double(b(), c()), new Line2D.Double(c(), d()), new Line2D.Double(d(), a()));
@ -63,7 +134,7 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
}
public QuadPointData data() {
public QuadPointData toData() {
return new QuadPointData(new float[]{(float) a.getX(), (float) a.getY(), (float) b.getX(), (float) b.getY(), (float) c.getX(), (float) c.getY(), (float) d.getX(), (float) d.getY()});
}
@ -134,13 +205,19 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
}
public double size() {
public double getRectangularSize() {
return a().distance(b()) * a().distance(d());
}
public double angle() {
public double getAngle() {
return calculateAngle(a, d);
}
private static double calculateAngle(Point2D a, Point2D d) {
double deltaY = d.getY() - a.getY();
double deltaX = d.getX() - a.getX();

View File

@ -17,10 +17,8 @@ public class OcrServiceSettings {
int batchSize = 128;
boolean debug; // writes the ocr layer visibly to the viewer doc pdf
boolean idpEnabled; // Enables table detection, paragraph classification, section detection, key-value detection.
boolean drawTablesAsLines; // writes the tables to the PDF as invisible lines.
boolean processAllPages; // if this parameter is set, ocr will be performed on any page, regardless if it has images or not
boolean fontStyleDetection; // Enables bold detection using ghostscript and leptonica
boolean snuggify = true; // Enables bold detection using ghostscript and leptonica
String contentFormat; // Either markdown or text. But, for whatever reason, with markdown enabled, key-values are not written by azure....
}

View File

@ -7,6 +7,7 @@ import com.pdftron.pdf.PDFNet;
import com.sun.jna.NativeLibrary;
import jakarta.annotation.PostConstruct;
import lombok.AllArgsConstructor;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@ -14,6 +15,7 @@ import lombok.extern.slf4j.Slf4j;
@Slf4j
@Component
@RequiredArgsConstructor
@AllArgsConstructor
public class NativeLibrariesInitializer {
@Value("${pdftron.license:}")

View File

@ -2,19 +2,32 @@ package com.knecon.fforesight.service.ocr.processor.model;
import static com.knecon.fforesight.service.ocr.processor.utils.ListSplittingUtils.formatIntervals;
import java.util.ArrayList;
import java.nio.file.Path;
import java.util.List;
import java.util.function.Consumer;
import com.azure.core.util.BinaryData;
import lombok.AccessLevel;
import lombok.Getter;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public final class PageBatch implements Comparable<PageBatch> {
@Getter
int index;
@NonNull
List<Integer> lookup = new ArrayList<>();
List<Integer> lookup;
@NonNull
@Getter
Path batchDoc;
@NonNull
@Getter
Path imagePipelineDir;
@Override
@ -34,12 +47,6 @@ public final class PageBatch implements Comparable<PageBatch> {
}
public void add(Integer pageNumber) {
lookup.add(pageNumber);
}
public void forEach(Consumer<? super Integer> consumer) {
lookup.forEach(consumer);
@ -84,4 +91,10 @@ public final class PageBatch implements Comparable<PageBatch> {
return Integer.compare(lookup.get(0), o.lookup.get(0));
}
public BinaryData render() {
return BinaryData.fromFile(batchDoc);
}
}

View File

@ -3,7 +3,6 @@ package com.knecon.fforesight.service.ocr.processor.model;
import java.awt.geom.AffineTransform;
import java.awt.geom.Point2D;
import com.azure.ai.documentintelligence.models.DocumentWord;
import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontMetrics;
import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontMetricsProvider;
import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontStyle;
@ -20,7 +19,8 @@ public class TextPositionInImage {
final QuadPoint position;
final String text;
final AffineTransform imageCTM;
final AffineTransform resultToPageTransform;
final boolean snugBBox;
@Setter
boolean overlapsIgnoreZone;
@ -30,33 +30,34 @@ public class TextPositionInImage {
FontStyle fontStyle;
public TextPositionInImage(DocumentWord word, AffineTransform imageCTM, FontMetricsProvider fontMetricsProvider, FontStyle fontStyle) {
public TextPositionInImage(QuadPoint position, String text, AffineTransform resultToPageTransform, FontMetricsProvider fontMetricsProvider, FontStyle fontStyle, boolean snugBBox) {
this.position = QuadPoint.fromPolygons(word.getPolygon());
this.text = word.getContent();
this.imageCTM = imageCTM;
this.position = position;
this.text = text;
this.resultToPageTransform = resultToPageTransform;
this.fontMetricsProvider = fontMetricsProvider;
this.fontStyle = fontStyle;
this.snugBBox = snugBBox;
}
public QuadPoint getTransformedTextBBox() {
return position.getTransformed(imageCTM);
return position.getTransformed(resultToPageTransform);
}
public AffineTransform getTextMatrix() {
FontMetrics metrics = fontMetricsProvider.calculateMetrics(text, getTransformedWidth(), getTransformedHeight());
FontMetrics metrics = getMetrics();
// Matrix multiplication is from right to left:
// convert to image coords -> subtract descent -> scale height -> reverse imageCTM scaling -> translate to coordinates in image -> convert to pdf coords
// width must not be set, since it is scaled with the fontsize attribute
double rotation = position.angle();
double rotation = position.getAngle();
Point2D anchor = new Point2D.Double(position.b().getX(), position.b().getY());
AffineTransform ctm = new AffineTransform();
ctm.concatenate(imageCTM);
ctm.concatenate(resultToPageTransform);
ctm.translate(anchor.getX(), anchor.getY());
ctm.scale(getWidth() / getTransformedWidth(),
getHeight() / getTransformedHeight()); // scale with transformation coefficient, such that fontsize may be set with transformed width.
@ -69,6 +70,15 @@ public class TextPositionInImage {
}
private FontMetrics getMetrics() {
if (snugBBox) {
return fontMetricsProvider.calculateMetricsForTightBBox(text, getTransformedWidth(), getTransformedHeight());
}
return fontMetricsProvider.calculateMetricsForAzureBBox(text, getTransformedWidth(), getTransformedHeight());
}
public double getFontSize() {
// The fontsize as estimated by the word width
return fontMetricsProvider.calculateFontSize(text, getTransformedWidth());
@ -95,7 +105,7 @@ public class TextPositionInImage {
public double getFontSizeByHeight() {
// The fontsize as estimated by the word height, only used for font style detection
var metrics = fontMetricsProvider.calculateMetrics(text, getTransformedWidth(), getTransformedHeight());
var metrics = getMetrics();
return fontMetricsProvider.calculateFontSize(text, getTransformedWidth()) * metrics.getHeightScaling();
}
@ -108,25 +118,25 @@ public class TextPositionInImage {
public Point2D transformedA() {
return imageCTM.transform(position.a(), null);
return resultToPageTransform.transform(position.a(), null);
}
public Point2D transformedB() {
return imageCTM.transform(position.b(), null);
return resultToPageTransform.transform(position.b(), null);
}
public Point2D transformedC() {
return imageCTM.transform(position.c(), null);
return resultToPageTransform.transform(position.c(), null);
}
public Point2D transformedD() {
return imageCTM.transform(position.d(), null);
return resultToPageTransform.transform(position.d(), null);
}
}

View File

@ -1,28 +1,27 @@
package com.knecon.fforesight.service.ocr.processor.service;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.slf4j.MDC;
import org.springframework.stereotype.Service;
import com.azure.ai.documentintelligence.models.AnalyzeResult;
import com.azure.core.util.BinaryData;
import com.azure.core.util.polling.LongRunningOperationStatus;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
import com.knecon.fforesight.service.ocr.processor.visualizations.layers.LayerFactory;
import com.knecon.fforesight.service.ocr.processor.visualizations.layers.OcrResult;
import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.Optimizer;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import com.pdftron.pdf.PDFDoc;
import com.pdftron.sdf.SDFDoc;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
import reactor.core.publisher.Mono;
@ -35,16 +34,13 @@ public class AsyncOcrService {
AzureOcrResource azureOcrResource;
OcrServiceSettings settings;
ImageProcessingPipeline imageProcessingPipeline;
ObjectMapper mapper;
public OcrResult awaitOcr(PDFDoc pdfDoc,
OcrExecutionSupervisor supervisor,
Set<Integer> pagesWithImages,
ImageProcessingSupervisor imageSupervisor) throws InterruptedException, PDFNetException {
public OcrResult awaitOcr(PDFDoc pdfDoc, OcrExecutionSupervisor supervisor, Set<AzureOcrFeature> features, List<PageBatch> batches) throws InterruptedException {
LayerFactory layerFactory = new LayerFactory(settings, supervisor, imageSupervisor, PageInformation.fromPDFDoc(pdfDoc));
List<PageBatch> batches = splitIntoBatches(pdfDoc, supervisor, pagesWithImages);
LayerFactory layerFactory = new LayerFactory(settings, features, supervisor, PageInformation.fromPDFDoc(pdfDoc), imageProcessingPipeline);
for (PageBatch batch : batches) {
@ -57,12 +53,10 @@ public class AsyncOcrService {
supervisor.requireNoErrors();
batchContext.batchStats().start();
BinaryData data = renderBatch(pdfDoc, batch);
BinaryData data = batch.render();
batchContext.batchStats().batchRenderFinished();
beginAnalysis(data, batchContext);
beginAnalysis(data, batchContext, features);
}
supervisor.awaitAllPagesProcessed();
@ -71,45 +65,17 @@ public class AsyncOcrService {
}
private static BinaryData renderBatch(PDFDoc pdfDoc, PageBatch batch) throws PDFNetException {
BinaryData docData;
try (var smallerDoc = extractBatchDocument(pdfDoc, batch)) {
Optimizer.optimize(smallerDoc);
docData = BinaryData.fromBytes(smallerDoc.save(SDFDoc.SaveMode.LINEARIZED, null));
}
return docData;
}
private List<PageBatch> splitIntoBatches(PDFDoc pdfDoc, OcrExecutionSupervisor supervisor, Set<Integer> pagesWithImages) throws PDFNetException {
List<PageBatch> batches = new ArrayList<>();
PageBatch currentBatch = new PageBatch();
batches.add(currentBatch);
for (int pageNumber = 1; pageNumber <= pdfDoc.getPageCount(); pageNumber++) {
if (!settings.isProcessAllPages() && !pagesWithImages.contains(pageNumber)) {
supervisor.logPageSkipped(pageNumber);
continue;
}
currentBatch.add(pageNumber);
if (currentBatch.size() == settings.getBatchSize()) {
currentBatch = new PageBatch();
batches.add(currentBatch);
}
}
return batches;
}
private void beginAnalysis(BinaryData data, BatchContext batchContext) throws InterruptedException {
private void beginAnalysis(BinaryData data, BatchContext batchContext, Set<AzureOcrFeature> features) throws InterruptedException {
batchContext.supervisor.enterConcurrency(batchContext.batch);
batchContext.supervisor.logUploadStart(batchContext.batch, data.getLength());
azureOcrResource.callAzureAsync(data)
var mdcContext = MDC.getCopyOfContextMap();
azureOcrResource.callAzureAsync(data, features)
.flatMap(response -> {
MDC.setContextMap(mdcContext);
if (response.getStatus().equals(LongRunningOperationStatus.IN_PROGRESS)) {
batchContext.supervisor.logInProgress(batchContext.batch);
}
@ -128,6 +94,7 @@ public class AsyncOcrService {
private static void handleCompleted(BatchContext batchContext) {
log.info("Completed : {}", batchContext.batch);
batchContext.supervisor.leaveConcurrency(batchContext.batch);
}
@ -141,32 +108,17 @@ public class AsyncOcrService {
private void handleSuccessful(AnalyzeResult finalResult, BatchContext batchContext) {
try {
batchContext.layerFactory.addAnalyzeResult(batchContext.batch, finalResult);
batchContext.supervisor.logPageSuccess(batchContext.batch);
batchContext.supervisor.logPageSuccess(batchContext.batch());
if (settings.isDebug()) {
mapper.writeValue(batchContext.batch().getImagePipelineDir().resolve("azure_result_%d.json".formatted(batchContext.batch().getIndex())).toFile(), finalResult);
}
batchContext.layerFactory.processAnalyzeResult(batchContext.batch(), finalResult);
} catch (Exception e) {
handleError(e, batchContext);
}
}
private static PDFDoc extractBatchDocument(PDFDoc pdfDoc, PageBatch pageBatch) throws PDFNetException {
if (pageBatch.size() < 0) {
throw new IllegalArgumentException();
}
PDFDoc singlePagePdfDoc = new PDFDoc();
pageBatch.forEach(pageNumber -> addPageToNewDoc(pageNumber, pdfDoc, singlePagePdfDoc));
return singlePagePdfDoc;
}
@SneakyThrows
private static void addPageToNewDoc(Integer pageNumber, PDFDoc pdfDoc, PDFDoc singlePagePdfDoc) {
singlePagePdfDoc.pagePushBack(pdfDoc.getPage(pageNumber));
}
private record BatchContext(LayerFactory layerFactory, OcrExecutionSupervisor supervisor, PageBatch batch) {
BatchStats batchStats() {

View File

@ -2,9 +2,11 @@ package com.knecon.fforesight.service.ocr.processor.service;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.yaml.snakeyaml.events.Event;
import com.azure.ai.documentintelligence.DocumentIntelligenceAsyncClient;
import com.azure.ai.documentintelligence.DocumentIntelligenceClientBuilder;
@ -19,6 +21,7 @@ import com.azure.core.util.BinaryData;
import com.azure.core.util.polling.PollerFlux;
import com.google.common.base.Objects;
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import lombok.AccessLevel;
import lombok.SneakyThrows;
@ -42,11 +45,18 @@ public class AzureOcrResource {
@SneakyThrows
public PollerFlux<AnalyzeResultOperation, AnalyzeResult> callAzureAsync(BinaryData data) {
public PollerFlux<AnalyzeResultOperation, AnalyzeResult> callAzureAsync(BinaryData data, Set<AzureOcrFeature> features) {
AnalyzeDocumentRequest analyzeRequest = new AnalyzeDocumentRequest().setBase64Source(data.toBytes());
return asyncClient.beginAnalyzeDocument(getModelId(), null, null, StringIndexType.UTF16CODE_UNIT, buildFeatures(), null, buildContentFormat(), analyzeRequest);
return asyncClient.beginAnalyzeDocument(getModelId(features),
null,
null,
StringIndexType.UTF16CODE_UNIT,
buildFeatures(features),
null,
buildContentFormat(),
analyzeRequest);
}
@ -60,25 +70,25 @@ public class AzureOcrResource {
}
private String getModelId() {
private String getModelId(Set<AzureOcrFeature> features) {
if (settings.isIdpEnabled()) {
if (features.contains(AzureOcrFeature.IDP)) {
return "prebuilt-layout";
}
return "prebuilt-read";
}
private List<DocumentAnalysisFeature> buildFeatures() {
private List<DocumentAnalysisFeature> buildFeatures(Set<AzureOcrFeature> features) {
var features = new ArrayList<DocumentAnalysisFeature>();
var azureFeatures = new ArrayList<DocumentAnalysisFeature>();
if (settings.isIdpEnabled()) {
features.add(DocumentAnalysisFeature.KEY_VALUE_PAIRS);
if (features.contains(AzureOcrFeature.IDP)) {
azureFeatures.add(DocumentAnalysisFeature.KEY_VALUE_PAIRS);
}
features.add(DocumentAnalysisFeature.BARCODES);
azureFeatures.add(DocumentAnalysisFeature.BARCODES);
return features;
return azureFeatures;
}
}

View File

@ -0,0 +1,107 @@
package com.knecon.fforesight.service.ocr.processor.service;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.Optimizer;
import com.pdftron.pdf.PDFDoc;
import com.pdftron.sdf.SDFDoc;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
@Service
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class BatchFactory {
OcrServiceSettings settings;
public static String formatBatchFilename(int number) {
return "batch_%d.pdf".formatted(number);
}
@SneakyThrows
public List<PageBatch> splitIntoBatches(PDFDoc pdfDoc, OcrExecutionSupervisor supervisor, Set<AzureOcrFeature> features, Path fileDir) {
Set<Integer> pagesWithImages = ImageDetectionService.findPagesToProcess(pdfDoc, features);
supervisor.logImageExtractionFinished(pdfDoc.getPageCount(), pagesWithImages.size());
return buildBatches(pdfDoc, supervisor, features, fileDir, pagesWithImages);
}
public List<PageBatch> buildBatches(PDFDoc pdfDoc,
OcrExecutionSupervisor supervisor,
Set<AzureOcrFeature> features,
Path fileDir,
Set<Integer> pagesWithImages) throws PDFNetException {
List<PageBatch> batches = new ArrayList<>();
List<Integer> numbersForCurrentBatch = new ArrayList<>();
for (int pageNumber = 1; pageNumber <= pdfDoc.getPageCount(); pageNumber++) {
if (!features.contains(AzureOcrFeature.ALL_PAGES) && !pagesWithImages.contains(pageNumber)) {
supervisor.logPageSkipped(pageNumber);
continue;
}
numbersForCurrentBatch.add(pageNumber);
if (numbersForCurrentBatch.size() == settings.getBatchSize()) {
batches.add(create(batches.size(), pdfDoc, numbersForCurrentBatch, fileDir));
numbersForCurrentBatch = new ArrayList<>();
}
}
if (!numbersForCurrentBatch.isEmpty()) {
batches.add(create(batches.size(), pdfDoc, numbersForCurrentBatch, fileDir));
}
return batches;
}
@SneakyThrows
public static PageBatch create(int number, PDFDoc pdfDoc, List<Integer> pageNumbers, Path fileDir) {
if (pageNumbers.isEmpty()) {
throw new IllegalArgumentException("pageNumbers must not be empty");
}
Path batchDocPath = fileDir.resolve(formatBatchFilename(number));
try (var batchDoc = extractBatchDocument(pdfDoc, pageNumbers)) {
Optimizer.optimize(batchDoc);
batchDoc.save(batchDocPath.toFile().toString(), SDFDoc.SaveMode.LINEARIZED, null);
}
return new PageBatch(number, pageNumbers, batchDocPath, fileDir);
}
private static PDFDoc extractBatchDocument(PDFDoc pdfDoc, List<Integer> pageBatch) throws PDFNetException {
if (pageBatch.isEmpty()) {
throw new IllegalArgumentException();
}
PDFDoc batchDoc = new PDFDoc();
pageBatch.forEach(pageNumber -> addPageToNewDoc(pageNumber, pdfDoc, batchDoc));
return batchDoc;
}
@SneakyThrows
private static void addPageToNewDoc(Integer pageNumber, PDFDoc pdfDoc, PDFDoc batchDoc) {
batchDoc.pagePushBack(pdfDoc.getPage(pageNumber));
}
}

View File

@ -7,28 +7,26 @@ import java.util.Set;
import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.Element;
import com.pdftron.pdf.ElementReader;
import com.pdftron.pdf.PDFDoc;
import lombok.SneakyThrows;
import lombok.experimental.UtilityClass;
@Service
@UtilityClass
public class ImageDetectionService {
// any image with smaller height and width than this gets thrown out, see everyPointInDashedLineIsImage.pdf
private static final int PIXEL_THRESHOLD = 0;
private final OcrServiceSettings ocrServiceSettings;
public ImageDetectionService(OcrServiceSettings ocrServiceSettings) {this.ocrServiceSettings = ocrServiceSettings;}
@SneakyThrows
public Set<Integer> findPagesToProcess(PDFDoc pdfDoc) {
public Set<Integer> findPagesToProcess(PDFDoc pdfDoc, Set<AzureOcrFeature> features) {
if (ocrServiceSettings.isProcessAllPages()) {
if (features.contains(AzureOcrFeature.ALL_PAGES)) {
Set<Integer> pages = new HashSet<>();
for (int i = 1; i <= pdfDoc.getPageCount(); i++) {
pages.add(i);

View File

@ -8,6 +8,7 @@ import java.io.FileOutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.List;
import java.util.Set;
import org.springframework.stereotype.Service;
@ -16,10 +17,11 @@ import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService;
import com.iqser.red.pdftronlogic.commons.OCGWatermarkRemovalService;
import com.iqser.red.pdftronlogic.commons.WatermarkRemovalService;
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
import com.knecon.fforesight.service.ocr.processor.model.Statistics;
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
import com.knecon.fforesight.service.ocr.processor.visualizations.layers.OcrResult;
import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import com.knecon.fforesight.service.viewerdoc.service.PDFTronViewerDocumentService;
import com.pdftron.pdf.PDFDoc;
@ -36,14 +38,15 @@ import lombok.extern.slf4j.Slf4j;
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class OCRService {
public static final String IMAGE_PIPELINE_DIR = "image_pipeline";
public static final String AZURE_OUTPUT_DIR = "azure_output";
IOcrMessageSender ocrMessageSender;
WatermarkRemovalService watermarkRemovalService;
InvisibleElementRemovalService invisibleElementRemovalService;
PDFTronViewerDocumentService viewerDocumentService;
ImageDetectionService imageDetectionService;
BatchFactory batchFactory;
AsyncOcrService asyncOcrService;
OcrServiceSettings settings;
ImageProcessingPipeline imageProcessingPipeline;
/**
@ -59,21 +62,21 @@ public class OCRService {
* @param analyzeResultFile result file with additional information
*/
@Observed(name = "OCRService", contextualName = "run-ocr-on-document")
public void runOcrOnDocument(String dossierId, String fileId, boolean removeWatermark, Path tmpDir, File documentFile, File viewerDocumentFile, File analyzeResultFile) {
public void runOcrOnDocument(String dossierId, String fileId, Set<AzureOcrFeature> features, Path tmpDir, File documentFile, File viewerDocumentFile, File analyzeResultFile) {
if (removeWatermark) {
if (features.contains(AzureOcrFeature.REMOVE_WATERMARKS)) {
removeWatermark(documentFile);
}
removeInvisibleElements(documentFile);
log.info("Starting OCR for file {}", fileId);
log.info("Starting OCR");
long ocrStart = System.currentTimeMillis();
Statistics stats = runOcr(tmpDir, documentFile, viewerDocumentFile, fileId, dossierId, analyzeResultFile).getStatistics();
Statistics stats = runOcr(tmpDir, documentFile, viewerDocumentFile, fileId, dossierId, analyzeResultFile, features).getStatistics();
long ocrEnd = System.currentTimeMillis();
log.info("ocr successful for file with dossierId {} and fileId {}, took {}", dossierId, fileId, humanizeDuration(ocrEnd - ocrStart));
log.info("OCR successful, took {}", humanizeDuration(ocrEnd - ocrStart));
if (settings.isDebug()) {
logRuntimeBreakdown(ocrEnd, ocrStart, stats);
@ -117,10 +120,16 @@ public class OCRService {
@SneakyThrows
public OcrExecutionSupervisor runOcr(Path tmpDir, File documentFile, File viewerDocumentFile, String fileId, String dossierId, File analyzeResultFile) {
public OcrExecutionSupervisor runOcr(Path tmpDir,
File documentFile,
File viewerDocumentFile,
String fileId,
String dossierId,
File analyzeResultFile,
Set<AzureOcrFeature> features) {
Path tmpImageDir = tmpDir.resolve("images");
Path azureOutputDir = tmpDir.resolve("azure_output");
Path tmpImageDir = tmpDir.resolve(IMAGE_PIPELINE_DIR);
Path azureOutputDir = tmpDir.resolve(AZURE_OUTPUT_DIR);
Files.createDirectories(azureOutputDir);
Files.createDirectories(tmpImageDir);
@ -132,19 +141,18 @@ public class OCRService {
OcrExecutionSupervisor supervisor = new OcrExecutionSupervisor(pdfDoc.getPageCount(), ocrMessageSender, fileId, settings);
supervisor.getStatistics().setStart();
Set<Integer> pagesWithImages = imageDetectionService.findPagesToProcess(pdfDoc);
ImageProcessingSupervisor imageSupervisor = null;
if (settings.isFontStyleDetection()) {
imageSupervisor = imageProcessingPipeline.run(pagesWithImages, tmpImageDir, documentFile);
}
List<PageBatch> batches = batchFactory.splitIntoBatches(pdfDoc, supervisor, features, tmpImageDir);
supervisor.logImageExtractionFinished(pdfDoc.getPageCount(), pagesWithImages.size());
OcrResult ocrResult = asyncOcrService.awaitOcr(pdfDoc, supervisor, pagesWithImages, imageSupervisor);
OcrResult ocrResult = asyncOcrService.awaitOcr(pdfDoc, supervisor, features, batches);
viewerDocumentService.addLayerGroups(documentFile, documentFile, ocrResult.regularLayers());
viewerDocumentService.addLayerGroups(documentFile, viewerDocumentFile, ocrResult.debugLayers());
if (features.contains(AzureOcrFeature.ROTATION_CORRECTION)) {
RotationCorrectionUtility.rotatePages(documentFile.toPath(), documentFile.toPath(), ocrResult.anglesPerPage());
RotationCorrectionUtility.rotatePages(viewerDocumentFile.toPath(), viewerDocumentFile.toPath(), ocrResult.anglesPerPage());
}
supervisor.getStatistics().drawingPdfFinished();
supervisor.sendFinished();

View File

@ -92,10 +92,10 @@ public class OcrExecutionSupervisor {
}
public void finishMappingResult(PageBatch pageRange) {
public void finishMappingResult(PageBatch batch) {
pageRange.forEach(pageIndex -> countDownPagesToProcess.countDown());
statistics.getBatchStats(pageRange).finishWritingText();
batch.forEach(pageIndex -> countDownPagesToProcess.countDown());
statistics.getBatchStats(batch).finishWritingText();
ocrMessageSender.sendUpdate(fileId, this.processedPages(), getTotalPageCount());
}

View File

@ -0,0 +1,179 @@
package com.knecon.fforesight.service.ocr.processor.service.imageprocessing;
import java.awt.geom.AffineTransform;
import java.awt.geom.Rectangle2D;
import java.nio.IntBuffer;
import java.util.Optional;
import com.azure.ai.documentintelligence.models.DocumentPage;
import com.azure.ai.documentintelligence.models.DocumentWord;
import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResultFactory;
import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint;
import com.sun.jna.Pointer;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import net.sourceforge.lept4j.Leptonica1;
import net.sourceforge.lept4j.Numa;
import net.sourceforge.lept4j.Pix;
import net.sourceforge.lept4j.util.LeptUtils;
/**
* This class attempts to shrink the BBox of a word to match the exact height of the word. This is only attempted for horizontal or vertical words. Any askew text is left as is.
*/
@Slf4j
public class BBoxSnuggificationService {
public static final int PIXEL_COUNT_THRESHOLD = 2; // minimum active pixel count per row for shrinking to stop
private static final double AVERAGE_ANGLE_THRESHOLD = 0.2; // Skips snuggification, if the average remaining word rotation of a word, written from left-to-right is bigger than this
public static final int INDIVIDUAL_ANGLE_THRESHOLD = 5; // skips snuggification for word, if the remaining rotation is larger than this angle
private enum Operation {
HORIZONTAL,
VERTICAL,
NONE
}
@SneakyThrows
public static Optional<QuadPoint> snuggify(Pix pageImage, DocumentWord origin, AffineTransform resultToImageTransform) {
if (pageImage == null) {
return Optional.empty();
}
QuadPoint originTransformed = QuadPoint.fromPolygons(origin.getPolygon()).getTransformed(resultToImageTransform);
double remainingAngle = Math.abs(RotationCorrectionUtility.getRemainingAngle(originTransformed.getAngle()));
QuadPoint.Direction direction = originTransformed.getDirection();
Operation operation = determineOperation(origin, direction, remainingAngle, originTransformed);
if (operation == Operation.NONE) {
return Optional.empty();
}
Pix wordImage = WritableOcrResultFactory.extractWordImage(originTransformed, pageImage);
if (wordImage == null) {
log.debug("Unable to extract word image! wordImage: {}, pageImage {}", originTransformed.getBounds2D(), new Rectangle2D.Float(0, 0, pageImage.w, pageImage.h));
return Optional.empty();
}
Optional<Rectangle2D> snugBox = switch (operation) {
case HORIZONTAL -> snuggifyY(wordImage, originTransformed.getBounds2D());
case VERTICAL -> snuggifyX(wordImage, originTransformed.getBounds2D());
default -> Optional.empty();
};
LeptUtils.disposePix(wordImage);
AffineTransform imageToResultTransform = resultToImageTransform.createInverse();
return snugBox.map(snugBBox -> QuadPoint.fromRectangle2D(snugBBox, direction))
.map(bbox -> bbox.getTransformed(imageToResultTransform));
}
private static Operation determineOperation(DocumentWord origin, QuadPoint.Direction direction, double remainingAngle, QuadPoint originTransformed) {
Operation operation = Operation.NONE;
if (((direction.equals(QuadPoint.Direction.RIGHT) || direction.equals(QuadPoint.Direction.LEFT)) && remainingAngle < INDIVIDUAL_ANGLE_THRESHOLD) //
|| (origin.getContent().length() < 4 || Math.abs(originTransformed.getAngle()) < AVERAGE_ANGLE_THRESHOLD * 3)) {
operation = Operation.HORIZONTAL;
} else if ((direction.equals(QuadPoint.Direction.UP) || direction.equals(QuadPoint.Direction.DOWN)) && remainingAngle < INDIVIDUAL_ANGLE_THRESHOLD) {
operation = Operation.VERTICAL;
}
return operation;
}
private static Optional<Rectangle2D> snuggifyX(Pix wordImage, Rectangle2D origin) {
Numa colCounts = Leptonica1.pixCountPixelsByColumn(wordImage);
int start = 0;
int end = wordImage.w - PIXEL_COUNT_THRESHOLD;
for (int i = start; i < Math.min(wordImage.w, 25); i++) {
if (pixCountPerColumn(i, colCounts) > PIXEL_COUNT_THRESHOLD) {
start = i;
break;
}
}
for (int i = end; i > Math.max(0, wordImage.w - 25); i--) {
if (pixCountPerColumn(i, colCounts) > PIXEL_COUNT_THRESHOLD) {
end = i;
break;
}
}
if (start == 0 && end == wordImage.w) {
return Optional.empty();
}
return Optional.of(new Rectangle2D.Double(origin.getX() + start, origin.getY(), origin.getWidth() - start - (wordImage.w - end), origin.getHeight()));
}
private static Optional<Rectangle2D> snuggifyY(Pix wordImage, Rectangle2D origin) {
int start = 0;
int end = wordImage.h - 1;
for (int i = start; i < Math.min(wordImage.h, 25); i++) {
if (pixCountPerRow(i, wordImage) > PIXEL_COUNT_THRESHOLD) {
start = i;
break;
}
}
for (int i = end; i > Math.max(0, wordImage.h - 25); i--) {
if (pixCountPerRow(i, wordImage) > PIXEL_COUNT_THRESHOLD) {
end = i;
break;
}
}
if (start == 0 && end == wordImage.h) {
return Optional.empty();
}
return Optional.of(new Rectangle2D.Double(origin.getX(), origin.getY() + start, origin.getWidth(), origin.getHeight() - start - (wordImage.h - end)));
}
private static int pixCountPerRow(int row, Pix pix) {
IntBuffer result = IntBuffer.allocate(1);
int success = Leptonica1.pixCountPixelsInRow(pix, row, result, null);
if (success == 0) {
return result.get();
} else {
return -1;
}
}
private static int pixCountPerColumn(int column, Numa colCounts) {
if (column > colCounts.n) {
throw new IndexOutOfBoundsException("column " + column + " is out of bounds for column count " + colCounts.n);
}
Pointer pointer = colCounts.array.getPointer();
// Read the float value at position i. Each float takes 4 bytes.
return (int) pointer.getFloat((long) column * Float.BYTES);
}
public static boolean canBeSnuggified(DocumentPage resultPage, AffineTransform imageTransform) {
double averageAngle = resultPage.getWords()
.stream()
.filter(word -> word.getContent().length() >= 4)
.map(DocumentWord::getPolygon)
.map(QuadPoint::fromPolygons)
.map(qp -> qp.getTransformed(imageTransform))
.filter(qp -> qp.getDirection().equals(QuadPoint.Direction.RIGHT))
.mapToDouble(QuadPoint::getAngle)
.map(Math::toDegrees)
.map(RotationCorrectionUtility::getRemainingAngle).average()
.orElse(Double.MAX_VALUE);
return Math.abs(averageAngle) < AVERAGE_ANGLE_THRESHOLD;
}
}

View File

@ -84,6 +84,7 @@ public class FontStyleDetector implements Closeable {
wordImage.textPosition().setFontMetricsProvider(Type0FontMetricsProvider.BOLD_INSTANCE);
wordImage.textPosition().setFontStyle(FontStyle.BOLD);
} else {
wordImage.textPosition().setFontMetricsProvider(Type0FontMetricsProvider.REGULAR_INSTANCE);
wordImage.textPosition().setFontStyle(FontStyle.REGULAR);
}
}

View File

@ -1,16 +1,20 @@
package com.knecon.fforesight.service.ocr.processor.service.imageprocessing;
import static com.knecon.fforesight.service.ocr.processor.utils.ListSplittingUtils.formatIntervals;
import java.io.BufferedReader;
import java.io.File;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.MDC;
import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
@ -27,7 +31,7 @@ public class GhostScriptOutputHandler extends Thread {
// If the stdError or stdOut buffer of a thread is not being emptied it might lock the process in case of errors, so we need to empty both streams to prevent a deadlock.
// Since both need to read simultaneously we need to implement the readers as separate threads.
final int batchIdx;
final InputStream is;
final String processName;
final Type type;
@ -36,24 +40,32 @@ public class GhostScriptOutputHandler extends Thread {
final Consumer<ImageFile> outputHandler;
final Consumer<String> errorHandler;
final Map<String, String> parentMdcContext;
int currentPageNumber;
public static GhostScriptOutputHandler stdError(InputStream is, Consumer<String> errorHandler) {
public static GhostScriptOutputHandler stdError(int batchIdx, InputStream is, Consumer<String> errorHandler) {
return new GhostScriptOutputHandler(is, "GS", Type.ERROR, null, null, errorHandler);
return new GhostScriptOutputHandler(batchIdx, is, "GS", Type.ERROR, null, null, errorHandler, MDC.getCopyOfContextMap());
}
public static GhostScriptOutputHandler stdOut(InputStream is, Map<Integer, ImageFile> pagesToProcess, Consumer<ImageFile> imageFileOutput, Consumer<String> errorHandler) {
public static GhostScriptOutputHandler stdOut(int batchIdx,
InputStream is,
Map<Integer, ImageFile> pagesToProcess,
Consumer<ImageFile> imageFileOutput,
Consumer<String> errorHandler) {
return new GhostScriptOutputHandler(is, "GS", Type.STD_OUT, pagesToProcess, imageFileOutput, errorHandler);
return new GhostScriptOutputHandler(batchIdx, is, "GS", Type.STD_OUT, pagesToProcess, imageFileOutput, errorHandler, MDC.getCopyOfContextMap());
}
@SneakyThrows
public void run() {
MDC.setContextMap(parentMdcContext);
try (InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr)) {
String line;
@ -77,7 +89,9 @@ public class GhostScriptOutputHandler extends Thread {
queueFinishedPage(currentPageNumber);
if (!pagesToProcess.isEmpty()) {
errorHandler.accept(String.format("Ghostscript finished for batch, but pages %s remain unprocessed.", formatPagesToProcess()));
errorHandler.accept(String.format("Ghostscript finished for batch %d, but pages %s remain unprocessed.", batchIdx, formatPagesToProcess()));
} else {
log.info("{}: Batch rendered successfully!", batchIdx);
}
}
@ -86,10 +100,16 @@ public class GhostScriptOutputHandler extends Thread {
private String formatPagesToProcess() {
var pages = new PageBatch();
pagesToProcess.keySet()
.forEach(pages::add);
return pages.toString();
List<String> intervals = formatIntervals(pagesToProcess.keySet()
.stream()
.sorted()
.toList());
if (intervals.size() > 4) {
intervals = intervals.subList(0, 4);
intervals.add("...");
}
return String.join(", ", intervals);
}
@ -106,7 +126,6 @@ public class GhostScriptOutputHandler extends Thread {
currentPageNumber = pageNumber;
return;
}
queueFinishedPage(currentPageNumber);
currentPageNumber = pageNumber;
}
@ -117,10 +136,10 @@ public class GhostScriptOutputHandler extends Thread {
var imageFile = this.pagesToProcess.remove(pageNumber);
if (imageFile == null) {
errorHandler.accept(String.format("Page number %d does not exist in this thread. It only has pagenumbers %s", pageNumber, pagesToProcess.keySet()));
errorHandler.accept(String.format("%d: Page number %d does not exist in this thread. It only has pagenumbers %s", batchIdx, pageNumber, pagesToProcess.keySet()));
} else {
if (!new File(imageFile.absoluteFilePath()).exists()) {
errorHandler.accept(String.format("Rendered page with number %d does not exist!", pageNumber));
errorHandler.accept(String.format("%d: Rendered page with number %d does not exist!", batchIdx, pageNumber));
}
}
outputHandler.accept(imageFile);

View File

@ -2,18 +2,15 @@ package com.knecon.fforesight.service.ocr.processor.service.imageprocessing;
import java.io.InputStream;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
import com.knecon.fforesight.service.ocr.processor.utils.ListSplittingUtils;
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
@ -25,132 +22,60 @@ import lombok.extern.slf4j.Slf4j;
@Service
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
@SuppressWarnings("PMD") // can't figure out how to safely close the stdOut and stdError streams in line 142/144
@SuppressWarnings("PMD") // can't figure out how to safely close the stdOut and stdError streams in line 72/74
public class GhostScriptService {
public static final int BATCH_SIZE = 256;
static String FORMAT = ".tiff";
static String DEVICE = "tiffgray";
static int DPI = 300;
static int PROCESS_COUNT = 1;
@SneakyThrows
public void renderPagesBatched(List<Integer> pagesToProcess,
String documentAbsolutePath,
Path tmpImageDir,
ImageProcessingSupervisor supervisor,
Consumer<ImageFile> successHandler,
Consumer<String> errorHandler) {
public void startBatchRender(PageBatch batch, ImageProcessingSupervisor supervisor, Path renderedImageDir, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) {
List<List<ProcessInfo>> processInfoBatches = buildSubListForEachProcess(pagesToProcess,
PROCESS_COUNT,
BATCH_SIZE
* PROCESS_COUNT); // GS has a limit on how many pageIndices per call are possible, so we limit it to 256 pages per process
for (int batchIdx = 0; batchIdx < processInfoBatches.size(); batchIdx++) {
supervisor.requireNoErrors();
supervisor.requireNoErrors();
List<ProcessInfo> processInfos = processInfoBatches.get(batchIdx);
log.info("Batch {}: Running {} gs processes with ({}) pages each",
batchIdx,
processInfos.size(),
processInfos.stream()
.map(info -> info.pageNumbers().size())
.map(String::valueOf)
.collect(Collectors.joining(", ")));
int finalBatchIdx = batchIdx;
List<Process> processes = processInfos.stream()
.parallel()
.map(info -> buildCmdArgs(info.processIdx(), finalBatchIdx, info.pageNumbers(), tmpImageDir, documentAbsolutePath))
.peek(s -> log.debug(String.join(" ", s.cmdArgs())))
.map(processInfo -> executeProcess(processInfo, successHandler, errorHandler))
.toList();
List<Integer> processExitCodes = new LinkedList<>();
for (Process process : processes) {
processExitCodes.add(process.waitFor());
}
log.info("Batch {}: Ghostscript processes finished with exit codes {}", batchIdx, processExitCodes);
}
}
private List<List<ProcessInfo>> buildSubListForEachProcess(List<Integer> stitchedPageNumbers, int processCount, int batchSize) {
// GhostScript command line can only handle so many page numbers at once, so we split it into batches
int batchCount = (int) Math.ceil((double) stitchedPageNumbers.size() / batchSize);
log.info("Splitting {} page renderings across {} process(es) in {} batch(es) with size {}", stitchedPageNumbers.size(), processCount, batchCount, batchSize);
List<List<ProcessInfo>> processInfoBatches = new ArrayList<>(batchCount);
List<List<List<Integer>>> batchedBalancedSublist = ListSplittingUtils.buildBatchedBalancedSublist(stitchedPageNumbers.stream()
.sorted()
.toList(), processCount, batchCount);
for (var batch : batchedBalancedSublist) {
List<ProcessInfo> processInfos = new ArrayList<>(processCount);
for (int threadIdx = 0; threadIdx < batch.size(); threadIdx++) {
List<Integer> balancedPageNumbersSubList = batch.get(threadIdx);
processInfos.add(new ProcessInfo(threadIdx, balancedPageNumbersSubList));
}
processInfoBatches.add(processInfos);
}
return processInfoBatches;
log.info("Batch {}: starting GhostScript rendering with {} pages", batch, batch.size());
executeProcess(batch.getIndex(), buildCmdArgs(batch, renderedImageDir, batch.getBatchDoc()), successHandler, errorHandler);
}
@SneakyThrows
private ProcessCmdsAndRenderedImageFiles buildCmdArgs(Integer processIdx,
Integer batchIdx,
List<Integer> stitchedImagePageIndices,
Path outputDir,
String documentAbsolutePath) {
private ProcessCmdsAndRenderedImageFiles buildCmdArgs(PageBatch batch, Path outputDir, Path document) {
String imagePathFormat = outputDir.resolve("output_" + processIdx + "_" + batchIdx + ".%04d" + FORMAT).toFile().toString();
String imagePathFormat = outputDir.resolve("output_" + batch.getIndex() + ".%04d" + FORMAT).toFile().toString();
Map<Integer, ImageFile> fullPageImages = new HashMap<>();
for (int i = 0; i < stitchedImagePageIndices.size(); i++) {
Integer pageNumber = stitchedImagePageIndices.get(i);
fullPageImages.put(pageNumber, new ImageFile(pageNumber, String.format(imagePathFormat, i + 1)));
List<Integer> allPageNumbers = batch.getAllPageNumbers();
for (int i = 0; i < allPageNumbers.size(); i++) {
Integer pageNumber = allPageNumbers.get(i);
fullPageImages.put(i + 1, new ImageFile(pageNumber, String.format(imagePathFormat, i + 1)));
}
String[] cmdArgs = buildCmdArgs(stitchedImagePageIndices, documentAbsolutePath, imagePathFormat);
String[] cmdArgs = buildCmdArgs(document, imagePathFormat);
return new ProcessCmdsAndRenderedImageFiles(cmdArgs, fullPageImages);
}
private String[] buildCmdArgs(List<Integer> pageNumbers, String documentAbsolutePath, String imagePathFormat) {
private String[] buildCmdArgs(Path document, String imagePathFormat) {
StringBuilder sPageList = new StringBuilder();
int i = 1;
for (Integer integer : pageNumbers) {
sPageList.append(integer);
if (i < pageNumbers.size()) {
sPageList.append(",");
}
i++;
}
return new String[]{"gs", "-dNOPAUSE", "-sDEVICE=" + DEVICE, "-r" + DPI, "-sPageList=" + sPageList, "-sOutputFile=" + imagePathFormat, documentAbsolutePath, "-c", "quit"};
return new String[]{"gs", "-dNOPAUSE", "-sDEVICE=" + DEVICE, "-r" + DPI, "-sOutputFile=" + imagePathFormat, document.toFile().toString(), "-c", "quit"};
}
@SneakyThrows
private Process executeProcess(ProcessCmdsAndRenderedImageFiles processInfo, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) {
private void executeProcess(int batchIdx, ProcessCmdsAndRenderedImageFiles processInfo, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) {
Process p = Runtime.getRuntime().exec(processInfo.cmdArgs());
InputStream stdOut = p.getInputStream();
GhostScriptOutputHandler stdOutLogger = GhostScriptOutputHandler.stdOut(stdOut, processInfo.renderedPageImageFiles(), successHandler, errorHandler);
GhostScriptOutputHandler stdOutLogger = GhostScriptOutputHandler.stdOut(batchIdx, stdOut, processInfo.renderedPageImageFiles(), successHandler, errorHandler);
InputStream stdError = p.getErrorStream();
GhostScriptOutputHandler stdErrorLogger = GhostScriptOutputHandler.stdError(stdError, errorHandler);
GhostScriptOutputHandler stdErrorLogger = GhostScriptOutputHandler.stdError(batchIdx, stdError, errorHandler);
stdOutLogger.start();
stdErrorLogger.start();
return p;
}
@ -158,8 +83,4 @@ public class GhostScriptService {
}
private record ProcessInfo(Integer processIdx, List<Integer> pageNumbers) {
}
}

View File

@ -1,15 +1,14 @@
package com.knecon.fforesight.service.ocr.processor.service.imageprocessing;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Set;
import java.util.function.Consumer;
import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
@ -21,29 +20,30 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class ImageProcessingPipeline {
public static final String PROCESSED_DIR = "processed";
public static final String RENDERED_DIR = "rendered";
GhostScriptService ghostScriptService;
ImageProcessingService imageProcessingService;
@SneakyThrows
public ImageProcessingSupervisor run(Set<Integer> pageNumberSet, Path imageDir, File document) {
public ImageProcessingSupervisor addToPipeline(PageBatch batch) {
Path processedImageDir = imageDir.resolve("processed");
Path renderedImageDir = imageDir.resolve("rendered");
Path processedImageDir = batch.getImagePipelineDir().resolve(PROCESSED_DIR);
Path renderedImageDir = batch.getImagePipelineDir().resolve(RENDERED_DIR);
Files.createDirectories(renderedImageDir);
Files.createDirectories(processedImageDir);
List<Integer> pageNumbers = pageNumberSet.stream()
.sorted()
.toList();
List<Integer> pageNumbers = batch.getAllPageNumbers();
ImageProcessingSupervisor supervisor = new ImageProcessingSupervisor(pageNumbers);
Consumer<ImageFile> renderingSuccessConsumer = imageFile -> imageProcessingService.addToProcessingQueue(imageFile, processedImageDir, supervisor);
Consumer<String> renderingErrorConsumer = supervisor::markError;
ghostScriptService.renderPagesBatched(pageNumbers, document.toString(), renderedImageDir, supervisor, renderingSuccessConsumer, renderingErrorConsumer);
ghostScriptService.startBatchRender(batch, supervisor, renderedImageDir, renderingSuccessConsumer, renderingErrorConsumer);
return supervisor;
}

View File

@ -38,6 +38,7 @@ public class ImageProcessingService {
try {
process(processParams.unprocessedImage(), processParams.outputDir, processParams.supervisor());
} catch (Exception e) {
processParams.supervisor.markPageFinished(processParams.unprocessedImage());
log.error(e.getMessage(), e);
}
}
@ -54,31 +55,31 @@ public class ImageProcessingService {
}
@SneakyThrows
private void process(ImageFile unprocessedImage, Path outputDir, ImageProcessingSupervisor supervisor) {
supervisor.requireNoErrors();
String absoluteFilePath = outputDir.resolve(Path.of(unprocessedImage.absoluteFilePath()).getFileName()).toFile().toString();
ImageFile imageFile = new ImageFile(unprocessedImage.pageNumber(), absoluteFilePath);
try {
synchronized (ImageProcessingSupervisor.class) {
// Leptonica is not thread safe, but is being called in WritableOcrResultFactory as well
Pix processedPix;
Pix pix = unprocessedImage.readPix();
synchronized (ImageProcessingSupervisor.class) {
// Leptonica is not thread safe, but is being called in WritableOcrResultFactory as well
Pix processedPix;
Pix pix = unprocessedImage.readPix();
processedPix = processPix(pix);
Leptonica1.pixWrite(absoluteFilePath, processedPix, ILeptonica.IFF_TIFF_PACKBITS);
String absoluteFilePath = outputDir.resolve(Path.of(unprocessedImage.absoluteFilePath()).getFileName()).toFile().toString();
LeptUtils.disposePix(pix);
LeptUtils.disposePix(processedPix);
processedPix = processPix(pix);
Leptonica1.pixWrite(absoluteFilePath, processedPix, ILeptonica.IFF_TIFF_PACKBITS);
LeptUtils.disposePix(pix);
LeptUtils.disposePix(processedPix);
ImageFile imageFile = new ImageFile(unprocessedImage.pageNumber(), absoluteFilePath);
}
} catch (Exception e) {
supervisor.markError(e.getMessage());
} finally {
supervisor.markPageFinished(imageFile);
}
}
@SneakyThrows
private Pix processPix(Pix pix) {
Pix binarized;

View File

@ -63,7 +63,7 @@ public class ImageProcessingSupervisor {
private boolean hasErrors() {
return errors.isEmpty();
return !errors.isEmpty();
}
@ -86,7 +86,7 @@ public class ImageProcessingSupervisor {
if (this.errors.isEmpty()) {
return;
}
throw new IllegalStateException("Error(s) occurred during image processing: " + String.join("\n", errors));
throw new IllegalStateException("Error(s) occurred during image processing: " + String.join("\n", errors.subList(0, Math.min(errors.size(), 3))));
}
}

View File

@ -0,0 +1,23 @@
package com.knecon.fforesight.service.ocr.processor.visualizations;
import java.util.function.Function;
import java.util.stream.Stream;
import com.azure.ai.documentintelligence.models.DocumentSpan;
import com.knecon.fforesight.service.ocr.processor.model.SpanLookup;
public class FontStyler {
public record Lookups(SpanLookup<DocumentSpan> bold, SpanLookup<DocumentSpan> italic, SpanLookup<DocumentSpan> handwritten) {
public static Lookups empty() {
return new Lookups(new SpanLookup<>(Stream.empty(), Function.identity()),
new SpanLookup<>(Stream.empty(), Function.identity()),
new SpanLookup<>(Stream.empty(), Function.identity()));
}
}
}

View File

@ -20,6 +20,7 @@ import lombok.experimental.FieldDefaults;
public final class WritableOcrResult {
int pageNumber;
double angle;
@Builder.Default
List<TextPositionInImage> textPositionInImage = Collections.emptyList();
@Builder.Default

View File

@ -11,6 +11,8 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@ -25,54 +27,67 @@ import com.azure.ai.documentintelligence.models.DocumentTableCell;
import com.azure.ai.documentintelligence.models.DocumentWord;
import com.azure.ai.documentintelligence.models.FontWeight;
import com.google.common.base.Functions;
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
import com.knecon.fforesight.service.ocr.processor.model.SpanLookup;
import com.knecon.fforesight.service.ocr.processor.model.TextPositionInImage;
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.FontStyleDetector;
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.BBoxSnuggificationService;
import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontMetricsProvider;
import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontStyle;
import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.Type0FontMetricsProvider;
import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint;
import lombok.AccessLevel;
import lombok.Getter;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
import net.sourceforge.lept4j.Box;
import net.sourceforge.lept4j.Leptonica1;
import net.sourceforge.lept4j.Pix;
import net.sourceforge.lept4j.util.LeptUtils;
@Slf4j
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class WritableOcrResultFactory {
FontMetricsProvider regularFont = Type0FontMetricsProvider.REGULAR_INSTANCE;
FontMetricsProvider boldFont = Type0FontMetricsProvider.BOLD_INSTANCE;
FontMetricsProvider italicFont = Type0FontMetricsProvider.ITALIC_INSTANCE;
FontMetricsProvider boldItalicFont = Type0FontMetricsProvider.BOLD_ITALIC_INSTANCE;
@Getter
Map<Integer, AffineTransform> pageCtms;
Map<Integer, AffineTransform> resultToPageTransforms;
Map<Integer, PageInformation> pageInformation;
ImageProcessingPipeline imageProcessingPipeline;
OcrServiceSettings settings;
ImageProcessingSupervisor imageSupervisor;
Set<AzureOcrFeature> features;
@SneakyThrows
public WritableOcrResultFactory(Map<Integer, PageInformation> pageInformation, OcrServiceSettings settings, ImageProcessingSupervisor imageSupervisor) {
public WritableOcrResultFactory(Map<Integer, PageInformation> pageInformation,
ImageProcessingPipeline imageProcessingPipeline,
OcrServiceSettings settings,
Set<AzureOcrFeature> features) {
this.imageProcessingPipeline = imageProcessingPipeline;
this.pageInformation = pageInformation;
pageCtms = Collections.synchronizedMap(new HashMap<>());
resultToPageTransforms = Collections.synchronizedMap(new HashMap<>());
this.settings = settings;
this.imageSupervisor = imageSupervisor;
this.features = features;
}
public List<WritableOcrResult> buildOcrResultToWrite(AnalyzeResult analyzeResult, PageBatch pageOffset) throws InterruptedException {
public List<WritableOcrResult> buildOcrResultToWrite(AnalyzeResult analyzeResult, PageBatch batch) throws InterruptedException {
Map<Integer, Double> anglesPerPage = analyzeResult.getPages()
.stream()
.collect(Collectors.toMap(DocumentPage::getPageNumber, documentPage -> -documentPage.getAngle()));
RotationCorrectionUtility.rotatePages(batch.getBatchDoc(), batch.getBatchDoc(), anglesPerPage);
ImageProcessingSupervisor imageSupervisor = imageProcessingPipeline.addToPipeline(batch);
List<WritableOcrResult> writableOcrResultList = new ArrayList<>();
@ -80,16 +95,16 @@ public class WritableOcrResultFactory {
for (DocumentPage resultPage : analyzeResult.getPages()) {
PageInformation pageInformation = getPageInformation(getPageNumber(pageOffset, resultPage));
AffineTransform pageCtm = getPageCTM(pageInformation, resultPage.getWidth());
pageCtms.put(getPageNumber(pageOffset, resultPage), pageCtm);
PageInformation pageInformation = getPageInformation(getPageNumber(batch, resultPage));
AffineTransform resultToPageTransform = buildResultToPageTransform(pageInformation, resultPage.getWidth());
resultToPageTransforms.put(getPageNumber(batch, resultPage), resultToPageTransform);
List<TextPositionInImage> words = buildTextPositionsInImage(pageOffset, resultPage, pageCtm, lookups, pageInformation);
List<TextPositionInImage> words = buildTextPositionsInImage(batch, resultPage, resultToPageTransform, lookups, pageInformation, imageSupervisor);
var builder = WritableOcrResult.builder().pageNumber(pageInformation.number()).textPositionInImage(words);
var builder = WritableOcrResult.builder().pageNumber(pageInformation.number()).textPositionInImage(words).angle(-resultPage.getAngle());
if (settings.isDrawTablesAsLines()) {
builder.tableLines(getTableLines(analyzeResult, pageInformation, pageCtm));
builder.tableLines(getTableLines(analyzeResult, pageInformation, resultToPageTransform));
}
writableOcrResultList.add(builder.build());
@ -101,46 +116,74 @@ public class WritableOcrResultFactory {
private List<TextPositionInImage> buildTextPositionsInImage(PageBatch pageOffset,
DocumentPage resultPage,
AffineTransform pageCtm,
AffineTransform resultToPageTransform,
Lookups lookups,
PageInformation pageInformation) throws InterruptedException {
PageInformation pageInformation,
ImageProcessingSupervisor imageSupervisor) throws InterruptedException {
if (!settings.isFontStyleDetection()) {
return buildText(resultPage, pageCtm, lookups, pageInformation);
if (!settings.isSnuggify() && !features.contains(AzureOcrFeature.FONT_STYLE_DETECTION)) {
return buildText(resultPage, resultToPageTransform, lookups, pageInformation);
}
ImageFile imageFile = imageSupervisor.awaitProcessedPage(getPageNumber(pageOffset, resultPage));
if (imageFile == null) {
return buildText(resultPage, pageCtm, lookups, pageInformation);
return buildText(resultPage, resultToPageTransform, lookups, pageInformation);
}
synchronized (ImageProcessingSupervisor.class) {
return buildTextWithBoldDetection(resultPage, pageCtm, pageInformation, imageFile);
// Leptonica is not thread safe, but is being called in ImageProcessingService as well
if (features.contains(AzureOcrFeature.FONT_STYLE_DETECTION)) {
return buildTextWithBoldDetection(resultPage, resultToPageTransform, pageInformation, imageFile);
}
return buildTextWithSnugBBoxes(resultPage, imageFile, resultToPageTransform, lookups, pageInformation);
}
}
private static List<TextPositionInImage> buildTextWithBoldDetection(DocumentPage resultPage, AffineTransform pageCtm, PageInformation pageInformation, ImageFile imageFile) {
// Leptonica is not thread safe, but is being called in ImageProcessingService as well
@SneakyThrows
private List<TextPositionInImage> buildTextWithBoldDetection(DocumentPage resultPage,
AffineTransform resultToPageTransform,
PageInformation pageInformation,
ImageFile imageFile) {
Pix pageImage = imageFile.readPix();
List<TextPositionInImage> words = new ArrayList<>();
try (FontStyleDetector fontStyleDetector = new FontStyleDetector()) {
AffineTransform imageTransform = new AffineTransform();
double scalingFactor = pageImage.w / resultPage.getWidth();
imageTransform.scale(scalingFactor, scalingFactor);
AffineTransform resultToImageTransform = buildResultToImageTransform(resultPage, pageImage);
for (DocumentWord word : resultPage.getWords()) {
TextPositionInImage textPosition = new TextPositionInImage(word, pageCtm, Type0FontMetricsProvider.REGULAR_INSTANCE, FontStyle.REGULAR);
TextPositionInImage textPosition;
if (canBeSnuggified(resultPage, resultToImageTransform)) {
textPosition = buildTextPositionInImageWithSnugBBox(word,
resultToPageTransform,
new FontInformation(FontStyle.REGULAR, Type0FontMetricsProvider.REGULAR_INSTANCE),
pageImage,
resultToImageTransform);
} else {
textPosition = new TextPositionInImage(QuadPoint.fromPolygons(word.getPolygon()),
word.getContent(),
resultToPageTransform,
new FontInformation(FontStyle.REGULAR, Type0FontMetricsProvider.REGULAR_INSTANCE).font(),
new FontInformation(FontStyle.REGULAR, Type0FontMetricsProvider.REGULAR_INSTANCE).fontStyle(),
false);
}
if (intersectsIgnoreZone(pageInformation.wordBBoxes(), textPosition)) {
textPosition.setOverlapsIgnoreZone(true);
}
Pix wordImage = extractWordImage(word, imageTransform, pageImage);
QuadPoint originTransformed = QuadPoint.fromPolygons(word.getPolygon()).getTransformed(resultToImageTransform);
Pix wordImage = extractWordImage(originTransformed, pageImage);
if (wordImage == null) {
log.debug("Unable to extract word image! wordImage: {}, pageImage {}", originTransformed.getBounds2D(), new Rectangle2D.Float(0, 0, pageImage.w, pageImage.h));
continue;
}
IntBuffer pixelCount = IntBuffer.allocate(1);
Leptonica1.pixCountPixels(wordImage, pixelCount, null);
@ -162,9 +205,31 @@ public class WritableOcrResultFactory {
}
private static Pix extractWordImage(DocumentWord word, AffineTransform imageTransform, Pix pageImage) {
@SneakyThrows
public static AffineTransform buildResultToImageTransform(DocumentPage resultPage, Pix pageImage) {
int quadrant = RotationCorrectionUtility.getQuadrantRotation(-resultPage.getAngle());
AffineTransform rotationCorrection = RotationCorrectionUtility.buildTransform(-resultPage.getAngle(), pageImage.w, pageImage.h);
AffineTransform imageTransform = new AffineTransform();
double scalingFactor = switch (quadrant) {
case 1, 3 -> pageImage.h / resultPage.getWidth();
default -> pageImage.w / resultPage.getWidth();
};
imageTransform.concatenate(rotationCorrection);
imageTransform.scale(scalingFactor, scalingFactor);
return imageTransform;
}
public static Pix extractWordImage(QuadPoint wordPosition, Pix pageImage) {
Rectangle2D wordBBox = wordPosition.getBounds2D();
Rectangle2D pageBBox = new Rectangle2D.Double(0, 0, pageImage.w, pageImage.h);
if (!pageBBox.contains(wordBBox)) {
return null;
}
Rectangle2D wordBBox = QuadPoint.fromPolygons(word.getPolygon()).getTransformed(imageTransform).getBounds2D();
Box box = new Box((int) wordBBox.getX(), (int) wordBBox.getY(), (int) wordBBox.getWidth(), (int) wordBBox.getHeight(), 1);
Pix wordImage = Leptonica1.pixClipRectangle(pageImage, box, null);
box.clear();
@ -172,19 +237,65 @@ public class WritableOcrResultFactory {
}
private List<TextPositionInImage> buildText(DocumentPage resultPage, AffineTransform pageCtm, Lookups lookups, PageInformation pageInformation) {
public List<TextPositionInImage> buildTextWithSnugBBoxes(DocumentPage resultPage,
ImageFile imageFile,
AffineTransform pageCtm,
Lookups lookups,
PageInformation pageInformation) {
Pix pageImage = imageFile.readPix();
AffineTransform resultToImageTransform = buildResultToImageTransform(resultPage, pageImage);
boolean snuggify = canBeSnuggified(resultPage, resultToImageTransform);
List<TextPositionInImage> list = new ArrayList<>();
for (DocumentWord word : resultPage.getWords()) {
FontInformation fontInformation = FontInformation.determineStyle(word, lookups);
TextPositionInImage textPositionInImage;
if (snuggify) {
textPositionInImage = buildTextPositionInImageWithSnugBBox(word, pageCtm, fontInformation, pageImage, resultToImageTransform);
} else {
textPositionInImage = new TextPositionInImage(QuadPoint.fromPolygons(word.getPolygon()),
word.getContent(),
pageCtm,
fontInformation.font(),
fontInformation.fontStyle(),
false);
}
markTextOverlappingIgnoreZone(textPositionInImage, pageInformation.wordBBoxes());
list.add(textPositionInImage);
}
LeptUtils.disposePix(pageImage);
return list;
}
private boolean canBeSnuggified(DocumentPage resultPage, AffineTransform resultToImageTransform) {
return settings.isSnuggify() && BBoxSnuggificationService.canBeSnuggified(resultPage, resultToImageTransform);
}
public List<TextPositionInImage> buildText(DocumentPage resultPage, AffineTransform pageCtm, Lookups lookups, PageInformation pageInformation) {
return resultPage.getWords()
.stream()
.map(word -> buildTextPositionInImage(word, pageCtm, lookups))
.map(word -> new TextPositionInImage(QuadPoint.fromPolygons(word.getPolygon()),
word.getContent(),
pageCtm,
FontInformation.determineStyle(word, lookups).font(),
FontInformation.determineStyle(word, lookups).fontStyle(),
false))
.map(textPositionInImage -> markTextOverlappingIgnoreZone(textPositionInImage, pageInformation.wordBBoxes()))
.collect(Collectors.toList());
}
private static int getPageNumber(PageBatch pageOffset, DocumentPage resultPage) {
private static int getPageNumber(PageBatch pageBatch, DocumentPage resultPage) {
return pageOffset.getPageNumber(resultPage.getPageNumber());
return pageBatch.getPageNumber(resultPage.getPageNumber());
}
@ -214,36 +325,53 @@ public class WritableOcrResultFactory {
.flatMap(Collection::stream), Functions.identity());
return new Lookups(boldLookup, italicLookup, handWrittenLookup);
}
private TextPositionInImage buildTextPositionInImage(DocumentWord dw, AffineTransform imageCTM, Lookups lookups) {
@SneakyThrows
private TextPositionInImage buildTextPositionInImageWithSnugBBox(DocumentWord dw,
AffineTransform imageCTM,
FontInformation fontInformation,
Pix pageImage,
AffineTransform resultToImageTransform) {
boolean bold = lookups.bold().containedInAnySpan(dw.getSpan());
boolean italic = lookups.italic().containedInAnySpan(dw.getSpan());
boolean handwritten = lookups.handwritten().containedInAnySpan(dw.getSpan());
QuadPoint origin = QuadPoint.fromPolygons(dw.getPolygon());
FontStyle fontStyle;
FontMetricsProvider font;
if (handwritten) {
fontStyle = FontStyle.HANDWRITTEN;
font = regularFont;
} else if (italic && bold) {
fontStyle = FontStyle.BOLD_ITALIC;
font = boldItalicFont;
} else if (bold) {
fontStyle = FontStyle.BOLD;
font = boldFont;
} else if (italic) {
fontStyle = FontStyle.ITALIC;
font = italicFont;
} else {
fontStyle = FontStyle.REGULAR;
font = regularFont;
Optional<QuadPoint> snugBBox = BBoxSnuggificationService.snuggify(pageImage, dw, resultToImageTransform);
return new TextPositionInImage(snugBBox.orElse(origin), dw.getContent(), imageCTM, fontInformation.font(), fontInformation.fontStyle(), snugBBox.isPresent());
}
private record FontInformation(FontStyle fontStyle, FontMetricsProvider font) {
public static FontInformation determineStyle(DocumentWord dw, Lookups lookups) {
boolean bold = lookups.bold().containedInAnySpan(dw.getSpan());
boolean italic = lookups.italic().containedInAnySpan(dw.getSpan());
boolean handwritten = lookups.handwritten().containedInAnySpan(dw.getSpan());
FontStyle fontStyle;
FontMetricsProvider font;
if (handwritten) {
fontStyle = FontStyle.HANDWRITTEN;
font = Type0FontMetricsProvider.REGULAR_INSTANCE;
} else if (italic && bold) {
fontStyle = FontStyle.BOLD_ITALIC;
font = Type0FontMetricsProvider.BOLD_ITALIC_INSTANCE;
} else if (bold) {
fontStyle = FontStyle.BOLD;
font = Type0FontMetricsProvider.BOLD_INSTANCE;
} else if (italic) {
fontStyle = FontStyle.ITALIC;
font = Type0FontMetricsProvider.ITALIC_INSTANCE;
} else {
fontStyle = FontStyle.REGULAR;
font = Type0FontMetricsProvider.REGULAR_INSTANCE;
}
return new FontInformation(fontStyle, font);
}
return new TextPositionInImage(dw, imageCTM, font, fontStyle);
}
@ -307,7 +435,7 @@ public class WritableOcrResultFactory {
}
public static AffineTransform getPageCTM(PageInformation pageInformation, double imageWidth) {
public static AffineTransform buildResultToPageTransform(PageInformation pageInformation, double imageWidth) {
double scalingFactor = calculateScalingFactor(imageWidth, pageInformation);
AffineTransform imageToCropBoxScaling = new AffineTransform(scalingFactor, 0, 0, scalingFactor, 0, 0);
@ -353,7 +481,7 @@ public class WritableOcrResultFactory {
}
private record Lookups(SpanLookup<DocumentSpan> bold, SpanLookup<DocumentSpan> italic, SpanLookup<DocumentSpan> handwritten) {
public record Lookups(SpanLookup<DocumentSpan> bold, SpanLookup<DocumentSpan> italic, SpanLookup<DocumentSpan> handwritten) {
public static Lookups empty() {

View File

@ -8,7 +8,7 @@ import lombok.SneakyThrows;
public interface FontMetricsProvider extends EmbeddableFont {
default FontMetrics calculateMetrics(String text, double textWidth, double textHeight) {
default FontMetrics calculateMetricsForAzureBBox(String text, double textWidth, double textHeight) {
HeightAndDescent heightAndDescent = calculateHeightAndDescent(text);
float fontSize = calculateFontSize(text, textWidth);
@ -18,6 +18,16 @@ public interface FontMetricsProvider extends EmbeddableFont {
}
default FontMetrics calculateMetricsForTightBBox(String text, double textWidth, double textHeight) {
HeightAndDescent heightAndDescent = calculateHeightAndDescent(text);
float fontSize = calculateFontSize(text, textWidth);
float heightScaling = (float) ((textHeight / (heightAndDescent.height() - heightAndDescent.descent())) * 1000) / fontSize;
return new FontMetrics((heightAndDescent.descent() / 1000) * fontSize, fontSize, heightScaling);
}
@SneakyThrows
default float calculateFontSize(String text, double textWidth) {

View File

@ -42,7 +42,7 @@ import lombok.experimental.FieldDefaults;
public class IdpLayer extends IdpLayerConfig {
public static final int LINE_WIDTH = 1;
private Map<Integer, AffineTransform> pageCtms;
private Map<Integer, AffineTransform> resultToPageTransform;
public void addSection(int pageNumber, DocumentSection section, SpanLookup<DocumentWord> wordsOnPage) {
@ -65,7 +65,7 @@ public class IdpLayer extends IdpLayerConfig {
var sectionsOnPage = getOrCreateVisualizationsOnPage(pageNumber, vis);
sectionsOnPage.getColoredRectangles().add(new ColoredRectangle(bbox.getTransformed(pageCtms.get(pageNumber)).getBounds2D(), color, LINE_WIDTH));
sectionsOnPage.getColoredRectangles().add(new ColoredRectangle(bbox.getTransformed(resultToPageTransform.get(pageNumber)).getBounds2D(), color, LINE_WIDTH));
}
@ -89,15 +89,20 @@ public class IdpLayer extends IdpLayerConfig {
if (keyValue.getValue() != null) {
addBoundingRegion(keyValue.getValue().getBoundingRegions(), keyValuePairs, VALUE_COLOR, pageOffset);
if (keyValue.getKey().getBoundingRegions().get(0).getPageNumber() != keyValue.getValue().getBoundingRegions().get(0).getPageNumber()) {
if (keyValue.getKey().getBoundingRegions()
.get(0).getPageNumber() != keyValue.getValue().getBoundingRegions()
.get(0).getPageNumber()) {
return;
}
int pageNumberWithOffset = pageOffset.getPageNumber(keyValue.getKey().getBoundingRegions().get(0).getPageNumber());
QuadPoint key = QuadPoint.fromPolygons(keyValue.getKey().getBoundingRegions().get(0).getPolygon());
QuadPoint value = QuadPoint.fromPolygons(keyValue.getValue().getBoundingRegions().get(0).getPolygon());
int pageNumberWithOffset = pageOffset.getPageNumber(keyValue.getKey().getBoundingRegions()
.get(0).getPageNumber());
QuadPoint key = QuadPoint.fromPolygons(keyValue.getKey().getBoundingRegions()
.get(0).getPolygon());
QuadPoint value = QuadPoint.fromPolygons(keyValue.getValue().getBoundingRegions()
.get(0).getPolygon());
var line = LineUtils.findClosestMidpointLine(key, value);
line = LineUtils.transform(line, pageCtms.get(pageNumberWithOffset));
line = LineUtils.transform(line, resultToPageTransform.get(pageNumberWithOffset));
var arrowHead = LineUtils.createArrowHead(line, Math.min(LineUtils.length(line), 5));
var linesOnPage = getOrCreateVisualizationsOnPage(pageNumberWithOffset, keyValuePairs).getColoredLines();
linesOnPage.add(new ColoredLine(line, KEY_VALUE_BBOX_COLOR, LINE_WIDTH));
@ -142,7 +147,7 @@ public class IdpLayer extends IdpLayerConfig {
private void addPolygon(int pageNumber, List<Double> polygon, Visualizations visualizations, Color color) {
VisualizationsOnPage visualizationsOnPage = getOrCreateVisualizationsOnPage(pageNumber, visualizations);
visualizationsOnPage.getColoredLines().addAll(LineUtils.quadPointAsLines(QuadPoint.fromPolygons(polygon).getTransformed(pageCtms.get(pageNumber)), color));
visualizationsOnPage.getColoredLines().addAll(LineUtils.quadPointAsLines(QuadPoint.fromPolygons(polygon).getTransformed(resultToPageTransform.get(pageNumber)), color));
}
@ -181,7 +186,8 @@ public class IdpLayer extends IdpLayerConfig {
var vis = getOrCreateVisualizationsOnPage(pageOffset.getPageNumber(boundingRegion.getPageNumber()), tables);
QuadPoint qp = QuadPoint.fromPolygons(boundingRegion.getPolygon()).getTransformed(pageCtms.get(pageOffset.getPageNumber(boundingRegion.getPageNumber())));
QuadPoint qp = QuadPoint.fromPolygons(boundingRegion.getPolygon())
.getTransformed(resultToPageTransform.get(pageOffset.getPageNumber(boundingRegion.getPageNumber())));
vis.getFilledRectangles().add(new FilledRectangle(qp.getBounds2D(), TITLE_COLOR, 0.2f));

View File

@ -20,9 +20,9 @@ public class IdpLayerFactory {
private final IdpLayer idpLayer;
IdpLayerFactory(Map<Integer, AffineTransform> pageCtms) {
IdpLayerFactory(Map<Integer, AffineTransform> resultToPageTransform) {
this.idpLayer = new IdpLayer(pageCtms);
this.idpLayer = new IdpLayer(resultToPageTransform);
}

View File

@ -1,17 +1,21 @@
package com.knecon.fforesight.service.ocr.processor.visualizations.layers;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.azure.ai.documentintelligence.models.AnalyzeResult;
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
import com.knecon.fforesight.service.ocr.processor.service.OcrExecutionSupervisor;
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResult;
import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResultFactory;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import com.knecon.fforesight.service.viewerdoc.layers.LayerGroup;
import lombok.AccessLevel;
@ -26,32 +30,43 @@ public class LayerFactory {
OcrDebugLayerFactory ocrDebugLayerFactory;
OcrTextLayerFactory ocrTextLayerFactory;
OcrServiceSettings settings;
Set<AzureOcrFeature> features;
Map<Integer, Double> angles;
public LayerFactory(OcrServiceSettings settings, OcrExecutionSupervisor supervisor, ImageProcessingSupervisor imageSupervisor, Map<Integer, PageInformation> pageInformation) {
public LayerFactory(OcrServiceSettings settings,
Set<AzureOcrFeature> features,
OcrExecutionSupervisor supervisor,
Map<Integer, PageInformation> pageInformation,
ImageProcessingPipeline imageProcessingPipeline) {
this.writableOcrResultFactory = new WritableOcrResultFactory(pageInformation, settings, imageSupervisor);
this.idpLayerFactory = new IdpLayerFactory(writableOcrResultFactory.getPageCtms());
this.writableOcrResultFactory = new WritableOcrResultFactory(pageInformation, imageProcessingPipeline, settings, features);
this.idpLayerFactory = new IdpLayerFactory(writableOcrResultFactory.getResultToPageTransforms());
this.ocrDebugLayerFactory = new OcrDebugLayerFactory();
this.ocrTextLayerFactory = new OcrTextLayerFactory();
this.settings = settings;
this.features = features;
this.supervisor = supervisor;
this.angles = Collections.synchronizedMap(new HashMap<>());
}
public void addAnalyzeResult(PageBatch pageRange, AnalyzeResult analyzeResult) throws InterruptedException {
public void processAnalyzeResult(PageBatch batch, AnalyzeResult analyzeResult) throws InterruptedException {
List<WritableOcrResult> results = writableOcrResultFactory.buildOcrResultToWrite(analyzeResult, batch);
results.forEach(result -> angles.put(result.getPageNumber(), result.getAngle()));
List<WritableOcrResult> results = writableOcrResultFactory.buildOcrResultToWrite(analyzeResult, pageRange);
ocrTextLayerFactory.addWritableOcrResult(results);
if (settings.isDebug()) {
ocrDebugLayerFactory.addAnalysisResult(results);
}
if (settings.isIdpEnabled()) {
idpLayerFactory.addAnalyzeResult(analyzeResult, pageRange);
if (features.contains(AzureOcrFeature.IDP)) {
idpLayerFactory.addAnalyzeResult(analyzeResult, batch);
}
this.supervisor.finishMappingResult(pageRange);
this.supervisor.finishMappingResult(batch);
}
@ -64,10 +79,10 @@ public class LayerFactory {
if (settings.isDebug()) {
debugLayers.add(ocrDebugLayerFactory.getOcrDebugLayer());
}
if (settings.isIdpEnabled()) {
if (features.contains(AzureOcrFeature.IDP)) {
debugLayers.add(idpLayerFactory.getIdpLayer());
}
return new OcrResult(List.of(ocrTextLayer), debugLayers);
return new OcrResult(List.of(ocrTextLayer), debugLayers, angles);
}
}

View File

@ -31,7 +31,7 @@ public class OcrDebugLayer extends OcrDebugLayerConfig {
word.getFontMetricsProvider(),
Optional.of(word.getTextMatrix()),
Optional.of(RenderingMode.FILL)));
bboxOnPage.getColoredLines().addAll(LineUtils.quadPointAsLines(word.getTransformedTextBBox()));
bboxOnPage.getColoredLines().addAll(LineUtils.quadPointAsLines(word.getTransformedTextBBox(), word.isSnugBBox()));
}
@ -57,4 +57,11 @@ public class OcrDebugLayer extends OcrDebugLayerConfig {
};
}
@Override
public boolean isVisibleByDefault() {
return true;
}
}

View File

@ -1,9 +1,10 @@
package com.knecon.fforesight.service.ocr.processor.visualizations.layers;
import java.util.List;
import java.util.Map;
import com.knecon.fforesight.service.viewerdoc.layers.LayerGroup;
public record OcrResult(List<LayerGroup> regularLayers, List<LayerGroup> debugLayers) {
public record OcrResult(List<LayerGroup> regularLayers, List<LayerGroup> debugLayers, Map<Integer, Double> anglesPerPage) {
}

View File

@ -14,12 +14,19 @@ import lombok.experimental.UtilityClass;
@UtilityClass
public class LineUtils {
public List<ColoredLine> quadPointAsLines(QuadPoint rect) {
public List<ColoredLine> quadPointAsLines(QuadPoint rect, boolean tight) {
return List.of(new ColoredLine(new Line2D.Double(rect.a(), rect.b()), Color.ORANGE, 1),
if (tight) {
return List.of(new ColoredLine(new Line2D.Double(rect.a(), rect.b()), Color.GREEN, 1),
new ColoredLine(new Line2D.Double(rect.b(), rect.c()), Color.GREEN, 1),
new ColoredLine(new Line2D.Double(rect.c(), rect.d()), Color.GREEN, 1),
new ColoredLine(new Line2D.Double(rect.d(), rect.a()), Color.GREEN, 1));
}
return List.of(new ColoredLine(new Line2D.Double(rect.a(), rect.b()), Color.BLUE, 1),
new ColoredLine(new Line2D.Double(rect.b(), rect.c()), Color.BLUE, 1),
new ColoredLine(new Line2D.Double(rect.c(), rect.d()), Color.GREEN, 1),
new ColoredLine(new Line2D.Double(rect.d(), rect.a()), Color.MAGENTA, 1));
new ColoredLine(new Line2D.Double(rect.c(), rect.d()), Color.BLUE, 1),
new ColoredLine(new Line2D.Double(rect.d(), rect.a()), Color.BLUE, 1));
}

View File

@ -0,0 +1,205 @@
package com.knecon.fforesight.service.ocr.processor.visualizations.utils;
import java.awt.geom.AffineTransform;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import com.knecon.fforesight.service.viewerdoc.LayerIdentifier;
import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.PDFDoc;
import com.pdftron.pdf.Page;
import com.pdftron.sdf.Obj;
import com.pdftron.sdf.SDFDoc;
import lombok.SneakyThrows;
import lombok.experimental.UtilityClass;
@UtilityClass
public class RotationCorrectionUtility {
public static final LayerIdentifier KNECON_ROTATION_CORRECTION = new LayerIdentifier(null, "ROTATION_CORRECTION");
@SneakyThrows
public void rotatePages(Path inputFile, Path outputFile, Map<Integer, Double> anglesPerPage) {
Path tmp = Files.createTempFile("tempDocument", ".pdf");
Files.copy(inputFile, tmp, StandardCopyOption.REPLACE_EXISTING);
try (var in = new FileInputStream(tmp.toFile()); var out = new FileOutputStream(outputFile.toFile())) {
rotatePages(in, out, anglesPerPage);
}
Files.deleteIfExists(tmp);
}
@SneakyThrows
public void rotatePages(InputStream in, OutputStream out, Map<Integer, Double> anglesPerPage) {
try (PDFDoc doc = new PDFDoc(in)) {
anglesPerPage.forEach((pageNumber, angle) -> rotatePage(pageNumber, doc, angle));
doc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
}
}
@SneakyThrows
public void rotatePage(int pageNumber, PDFDoc doc, double angle) {
int quadrants = getQuadrantRotation(angle);
Page page = doc.getPage(pageNumber);
page.setRotation((quadrants + page.getRotation()) % 4);
double remainingAngle = getRemainingAngle(angle, quadrants);
Obj contents = page.getContents();
String content = buildRotationContent(remainingAngle, page);
Obj rotationStream = doc.createIndirectStream(content.getBytes());
Obj newContentsArray = doc.createIndirectArray();
newContentsArray.pushBack(rotationStream);
addPreviousContents(contents, newContentsArray);
String closingContent = buildClosingContent();
Obj closingStream = doc.createIndirectStream(closingContent.getBytes());
newContentsArray.pushBack(closingStream);
page.getSDFObj().erase("Contents");
page.getSDFObj().put("Contents", newContentsArray);
}
private String buildClosingContent() {
List<String> closingCommands = new LinkedList<>();
closingCommands.add("Q");
return String.join("\n", closingCommands);
}
private String buildRotationContent(double angle, Page page) throws PDFNetException {
List<String> commands = new LinkedList<>();
double scale = getScalingFactor(angle, page);
commands.add("q");
commands.add("/%s <<>> BDC".formatted(KNECON_ROTATION_CORRECTION.markedContentName()));
commands.add(buildMatrixCommands(AffineTransform.getTranslateInstance(page.getPageWidth() / 2, page.getPageHeight() / 2)));
commands.add(buildMatrixCommands(AffineTransform.getRotateInstance(Math.toRadians(angle))));
commands.add(buildMatrixCommands(AffineTransform.getScaleInstance(scale, scale)));
commands.add(buildMatrixCommands(AffineTransform.getTranslateInstance(-page.getPageWidth() / 2, -page.getPageHeight() / 2)));
commands.add("EMC");
return String.join("\n", commands);
}
private void addPreviousContents(Obj contents, Obj newContentsArray) throws PDFNetException {
switch (contents.getType()) {
case Obj.e_array -> {
for (int i = 0; i < contents.size(); i++) {
newContentsArray.pushBack(contents.getAt(i));
}
}
case Obj.e_stream -> newContentsArray.pushBack(contents);
default -> throw new IllegalStateException("Unexpected value: " + contents.getType());
}
}
public static double getScalingFactor(double angle, Page page) throws PDFNetException {
double width = page.getPageWidth();
double height = page.getPageHeight();
return getScalingFactor(angle, width, height);
}
public static double getScalingFactor(double angle, double w, double h) {
if (Math.abs(angle) < 20) {
return 1;
}
double sin = Math.abs(Math.sin(Math.toRadians(angle)));
double cos = Math.abs(Math.cos(Math.toRadians(angle)));
double newWidth = w * cos + h * sin;
double newHeight = h * cos + w * sin;
return Math.min(w / newWidth, h / newHeight);
}
public static AffineTransform buildTransform(double angle, double originalWidth, double originalHeight) {
int quadrants = getQuadrantRotation(angle);
double h = originalHeight;
double w = originalWidth;
if (quadrants == 1 || quadrants == 3) {
w = originalHeight;
h = originalWidth;
}
AffineTransform quadrantRotation = switch (quadrants) {
case 1 -> new AffineTransform(0, 1, -1, 0, h, 0);
case 2 -> new AffineTransform(-1, 0, 0, -1, w, h);
case 3 -> new AffineTransform(0, -1, 1, 0, w - h, h);
default -> new AffineTransform();
};
double remainder = getRemainingAngle(angle, quadrants);
double scale = getScalingFactor(remainder, w, h);
AffineTransform transform = new AffineTransform();
transform.translate(w / 2, h / 2);
transform.rotate(Math.toRadians(remainder));
transform.scale(scale, scale);
transform.translate(-w / 2, -h / 2);
transform.concatenate(quadrantRotation);
return transform;
}
public static int getQuadrantRotation(double angle) {
double remainder = angle % 360;
if (remainder < 0) {
remainder += 360;
}
if (remainder > 315 || remainder <= 45) {
return 0;
} else if (remainder > 45 && remainder <= 135) {
return 1;
} else if (remainder > 135 && remainder <= 225) {
return 2;
} else {
return 3;
}
}
public static double getRemainingAngle(double angle, int quadrants) {
double referenceAngle = 90 * quadrants;
return angle - referenceAngle;
}
public static double getRemainingAngle(double angle) {
return getRemainingAngle(angle, getQuadrantRotation(angle));
}
private String buildMatrixCommands(AffineTransform at) {
return "%f %f %f %f %f %f cm".formatted(at.getScaleX(), at.getShearX(), at.getShearY(), at.getScaleY(), at.getTranslateX(), at.getTranslateY());
}
}

View File

@ -4,8 +4,8 @@ import java.io.File;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.HashSet;
import java.util.Set;
import java.util.LinkedList;
import java.util.List;
import org.apache.pdfbox.Loader;
import org.junit.jupiter.api.BeforeEach;
@ -13,11 +13,16 @@ import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.springframework.core.io.ClassPathResource;
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
import com.knecon.fforesight.service.ocr.processor.initializer.NativeLibrariesInitializer;
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.GhostScriptService;
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingService;
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
import com.knecon.fforesight.service.ocr.processor.utils.OsUtils;
import com.pdftron.pdf.PDFDoc;
import com.pdftron.pdf.PDFNet;
import com.sun.jna.NativeLibrary;
import lombok.SneakyThrows;
@ -31,10 +36,7 @@ class ImageProcessingPipelineTest {
@BeforeEach
public void setup() {
System.setProperty("jna.library.path", System.getenv("VCPKG_DYNAMIC_LIB"));
try (NativeLibrary leptonicaLib = NativeLibrary.getInstance("leptonica")) {
assert leptonicaLib != null;
}
new NativeLibrariesInitializer("demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a").init();
ImageProcessingService imageProcessingService = new ImageProcessingService();
GhostScriptService ghostScriptService = new GhostScriptService();
@ -46,7 +48,7 @@ class ImageProcessingPipelineTest {
@SneakyThrows
public void testImageProcessingPipeline() {
String fileName = "/home/kschuettler/Dokumente/TestFiles/OCR/VV-331340.pdf";
String fileName = "/home/kschuettler/Dokumente/TestFiles/OCR/VV-331340/VV-331340_OCRED_first15.pdf";
File file;
if (fileName.startsWith("files")) {
@ -63,21 +65,26 @@ class ImageProcessingPipelineTest {
Files.copy(file.toPath(), documentFile, StandardCopyOption.REPLACE_EXISTING);
int numberOfpages;
try (var doc = Loader.loadPDF(file)) {
numberOfpages = doc.getNumberOfPages();
}
Set<Integer> pageNumbers = new HashSet<>();
for (int i = 1; i <= numberOfpages; i++) {
if (i % 2 == 0) {
continue;
try (var doc = new PDFDoc(fileName)) {
List<Integer> pageNumbers = new LinkedList<>();
for (int i = 1; i <= doc.getPageCount(); i++) {
if (i % 2 == 0) {
continue;
}
pageNumbers.add(i);
}
pageNumbers.add(i);
PageBatch batch = BatchFactory.create(0, doc, pageNumbers, tmpDir);
ImageProcessingSupervisor supervisor = imageProcessingPipeline.addToPipeline(batch);
batch.forEach(pageNumber -> {
try {
assert supervisor.awaitProcessedPage(pageNumber) != null;
} catch (Exception e) {
e.printStackTrace();
}
});
}
ImageProcessingSupervisor supervisor = imageProcessingPipeline.run(pageNumbers, tmpDir.resolve("images"), documentFile.toFile());
supervisor.awaitAll();
}
}

View File

@ -0,0 +1,70 @@
package com.knecon.fforesight.service.ocr.processor.service;
import static com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility.KNECON_ROTATION_CORRECTION;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
import com.knecon.fforesight.service.viewerdoc.service.PageContentCleaner;
import com.pdftron.pdf.ElementReader;
import com.pdftron.pdf.ElementWriter;
import com.pdftron.pdf.PDFDoc;
import com.pdftron.pdf.PDFNet;
import com.pdftron.pdf.Page;
import com.pdftron.pdf.PageIterator;
import com.pdftron.sdf.SDFDoc;
import lombok.SneakyThrows;
@Disabled // leptonica is not available in build server
public class PageRotationTest {
@BeforeAll
public static void setUp() {
PDFNet.initialize("demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a");
}
@Test
public void putRotation() {
Map<Integer, Double> angles = new HashMap<>();
for (int i = 1; i <= 100; i++) {
double a = -90 + (i * ((double) 180 / 100));
angles.put(i, a);
}
Path inputFile = Path.of("/home/kschuettler/Dokumente/TestFiles/OCR/VV-331340-first100.pdf");
RotationCorrectionUtility.rotatePages(inputFile, Path.of("/tmp").resolve(inputFile.getFileName() + "_rotated.pdf"), angles);
}
@Test
@SneakyThrows
public void removeRotation() {
Path inputFile = Path.of("/tmp/VV-331340-first100.pdf_rotated.pdf");
try (var doc = new PDFDoc(inputFile.toFile()
.toString()); var reader = new ElementReader(); var writer = new ElementWriter(); PageIterator pageIterator = doc.getPageIterator()) {
PageContentCleaner cleaner = PageContentCleaner.builder()
.reader(reader)
.writer(writer)
.markedContentToRemove(Set.of(KNECON_ROTATION_CORRECTION.markedContentName()))
.build();
while (pageIterator.hasNext()) {
Page page = pageIterator.next();
cleaner.removeMarkedContent(page);
}
doc.save(inputFile.resolveSibling(inputFile.getFileName() + "_derotated.pdf").toFile().toString(), SDFDoc.SaveMode.LINEARIZED, null);
}
}
}

View File

@ -0,0 +1,246 @@
package com.knecon.fforesight.service.ocr.processor.service;
import static com.knecon.fforesight.service.ocr.processor.service.OCRService.IMAGE_PIPELINE_DIR;
import static com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline.PROCESSED_DIR;
import java.awt.Color;
import java.awt.geom.AffineTransform;
import java.awt.geom.Line2D;
import java.awt.geom.Rectangle2D;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import com.azure.ai.documentintelligence.models.AnalyzeResult;
import com.azure.json.JsonOptions;
import com.azure.json.JsonReader;
import com.azure.json.implementation.DefaultJsonReader;
import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService;
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
import com.knecon.fforesight.service.ocr.processor.initializer.NativeLibrariesInitializer;
import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
import com.knecon.fforesight.service.ocr.processor.model.TextPositionInImage;
import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResult;
import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResultFactory;
import com.knecon.fforesight.service.ocr.processor.visualizations.layers.OcrDebugLayerFactory;
import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint;
import com.knecon.fforesight.service.viewerdoc.service.PDFTronViewerDocumentService;
import com.pdftron.pdf.ColorPt;
import com.pdftron.pdf.ColorSpace;
import com.pdftron.pdf.Element;
import com.pdftron.pdf.ElementBuilder;
import com.pdftron.pdf.ElementWriter;
import com.pdftron.pdf.PDFDoc;
import com.pdftron.pdf.Page;
import com.sun.jna.Memory;
import com.sun.jna.Native;
import com.sun.jna.Pointer;
import com.sun.jna.ptr.PointerByReference;
import lombok.SneakyThrows;
import net.sourceforge.lept4j.Box;
import net.sourceforge.lept4j.Boxa;
import net.sourceforge.lept4j.util.LeptUtils;
@Disabled // leptonica is not available in build server
public class SnugBoxesTest {
public static final int PAGE_NUMBER = 41;
public static final Path ORIGIN_FILE = Path.of("/home/kschuettler/Dokumente/TestFiles/OCR/VV-331340-first100.pdf");
public static final Path TEST_FOLDER = Path.of("/tmp/OCR_TEST/").resolve(ORIGIN_FILE.getFileName());
public static final Path PROCESSED_FOLDER = TEST_FOLDER.resolve(IMAGE_PIPELINE_DIR).resolve(PROCESSED_DIR);
public static final Path DESTINATION_FILE = TEST_FOLDER.resolve("SnugBoxesTest.pdf");
public static final Path RESULT_FILE = TEST_FOLDER.resolve(IMAGE_PIPELINE_DIR).resolve("azure_result_0.json");
PDFTronViewerDocumentService viewerDocumentService = new PDFTronViewerDocumentService(null);
@BeforeAll
public static void setUp() {
new NativeLibrariesInitializer("demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a").init();
}
@Test
@SneakyThrows
public void snugBoxes() {
String filePath = ORIGIN_FILE.toFile().toString();
File file = PROCESSED_FOLDER.resolve("output_0.%04d.tiff".formatted(PAGE_NUMBER)).toFile();
assert file.exists();
ImageFile imageFile = new ImageFile(PAGE_NUMBER, file.toString());
AnalyzeResult result = null;
try (var in = new FileInputStream(RESULT_FILE.toFile()); JsonReader reader = DefaultJsonReader.fromStream(in, new JsonOptions());) {
result = AnalyzeResult.fromJson(reader);
}
var resultPage = result.getPages()
.get(PAGE_NUMBER - 1);
WritableOcrResultFactory writableOcrResultFactory = new WritableOcrResultFactory(null, null, new OcrServiceSettings(), Set.of());
OcrDebugLayerFactory debugLayerFactory = new OcrDebugLayerFactory();
InvisibleElementRemovalService invisibleElementRemovalService = new InvisibleElementRemovalService();
try (var in = new FileInputStream(ORIGIN_FILE.toFile()); var out = new FileOutputStream(DESTINATION_FILE.toFile())) {
invisibleElementRemovalService.removeInvisibleElements(in, out, false);
}
PageInformation pageInformation = getPageInformation(PAGE_NUMBER, DESTINATION_FILE.toFile().toString());
WritableOcrResultFactory.Lookups empty = WritableOcrResultFactory.Lookups.empty();
AffineTransform pageCtm = getPageCtm(PAGE_NUMBER, filePath, resultPage.getWidth());
// pageCtm.preConcatenate(rotationCorrection);
// pageCtm.preConcatenate(quadrantTransform);
// Pix pageImage = imageFile.readPix();
// AffineTransform imageTransform = WritableOcrResultFactory.buildImageTransform(resultPage, pageImage);
// List<Rectangle2D> rects = new LinkedList<>();
// for (DocumentWord word : resultPage.getWords()) {
// QuadPoint quadPoint = QuadPoint.fromPolygons(word.getPolygon());
// Rectangle2D rect = quadPoint.getTransformed(imageTransform).getBounds2D();
// if (rect.getX() > 0 && rect.getY() > 0 && rect.getMaxX() < pageImage.w && rect.getMaxY() < pageImage.h) {
// rects.add(rect);
// }
// }
// Boxa boxa = createBoxaFromRectangles(rects);
// Pix drawedPix = Leptonica1.pixDrawBoxa(pageImage, boxa, 5, 1);
// Leptonica1.pixWrite("/tmp/OCR_TEST/VV-331340-first100.pdf/image_pipeline/page_" + PAGE_NUMBER + ".tiff", drawedPix, 5);
//
List<TextPositionInImage> words = writableOcrResultFactory.buildTextWithSnugBBoxes(resultPage, imageFile, pageCtm, empty, pageInformation);
var results = new WritableOcrResult(PAGE_NUMBER, -resultPage.getAngle(), words, Collections.emptyList());
debugLayerFactory.addAnalysisResult(List.of(results));
// try (var doc = new PDFDoc(tmpFile.toString()); var out = new FileOutputStream(DESTINATION_FILE.toFile())) {
// PageRotationHelper.rotatePage(PAGE_NUMBER, doc, -resultPage.getAngle());
// var rects = resultPage.getWords()
// .stream()
// .map(DocumentWord::getPolygon)
// .map(QuadPoint::fromPolygons)
// .map(qp -> qp.getTransformed(pageCtm))
// .toList();
// drawRects(doc, rects, PAGE_NUMBER);
// doc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
// }
// Files.deleteIfExists(tmpFile);
viewerDocumentService.addLayerGroups(DESTINATION_FILE.toFile(), DESTINATION_FILE.toFile(), List.of(debugLayerFactory.getOcrDebugLayer()));
RotationCorrectionUtility.rotatePages(DESTINATION_FILE, DESTINATION_FILE, Map.of(PAGE_NUMBER, -resultPage.getAngle()));
}
//
// private static List<Rectangle2D> readRectsFromBoxa(Boxa boxa) {
//
// Pointer[] pointers = boxa.box.getPointer().getPointerArray(0, boxa.n);
// List<Rectangle2D> boxes = new ArrayList<>(boxa.n);
// for (int i = 0; i < boxa.n; i++) {
// Box box = new Box(pointers[i]);
// boxes.add(new Rectangle2D.Double(box.x, box.y, box.w, box.h));
// LeptUtils.dispose(box);
// }
// return boxes;
// }
//
//
// @SuppressWarnings("PMD") // Memory will be de-allocated with boxa
// public static Boxa createBoxaFromRectangles(List<Rectangle2D> rectangles) {
//
// if (rectangles.isEmpty()) {
// return new Boxa();
// }
//
// int n = rectangles.size(); // Number of rectangles
// int nalloc = n; // Allocating memory for exactly 'n' boxes
// int refcount = 1; // Default refcount
//
// Pointer boxPointerArray = new Memory((long) Native.POINTER_SIZE * n); // Memory for n pointers
//
// for (int i = 0; i < n; i++) {
//
// Rectangle2D rect = rectangles.get(i);
// var mem = new Memory(20L);
// mem.setInt(0, (int) rect.getX());
// mem.setInt(4, (int) rect.getY());
// mem.setInt(8, (int) rect.getWidth());
// mem.setInt(12, (int) rect.getHeight());
// mem.setInt(16, refcount);
//
// // Write the pointer of each Box into the native memory
// boxPointerArray.setPointer((long) Native.POINTER_SIZE * i, mem);
// }
//
// // Create a PointerByReference pointing to the native memory of the array
// PointerByReference boxPointerRef = new PointerByReference();
// boxPointerRef.setPointer(boxPointerArray);
//
// // Create the Boxa instance
//
// return new Boxa(n, nalloc, refcount, boxPointerRef);
// }
@SneakyThrows
private void drawRects(PDFDoc doc, List<QuadPoint> quadPoints, int pageNumber) {
try (ElementWriter writer = new ElementWriter(); ElementBuilder builder = new ElementBuilder()) {
Page page = doc.getPage(pageNumber);
writer.begin(page, ElementWriter.e_overlay);
for (QuadPoint quadPoint : quadPoints) {
quadPoint.asLines()
.forEach(line -> {
drawLine(line, builder, writer);
});
}
writer.end();
}
}
@SneakyThrows
private static void drawLine(Line2D l, ElementBuilder builder, ElementWriter writer) {
float[] rgbComponents = Color.BLUE.getRGBColorComponents(null);
builder.pathBegin();
builder.moveTo(l.getX1(), l.getY1());
builder.lineTo(l.getX2(), l.getY2());
Element line = builder.pathEnd();
line.setPathStroke(true);
line.setPathFill(false);
line.getGState().setLineWidth(1);
line.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
try (ColorPt color = new ColorPt(rgbComponents[0], rgbComponents[1], rgbComponents[2])) {
line.getGState().setStrokeColor(color);
}
writer.writeElement(line);
}
@SneakyThrows
private static AffineTransform getPageCtm(int pageNumber, String file, double imageWidh) {
return WritableOcrResultFactory.buildResultToPageTransform(getPageInformation(pageNumber, file), imageWidh);
}
@SneakyThrows
private static PageInformation getPageInformation(int pageNumber, String file) {
try (var in = new FileInputStream(file); var doc = new PDFDoc(in)) {
return PageInformation.fromPage(pageNumber, doc.getPage(pageNumber));
}
}
}

View File

@ -20,7 +20,7 @@ class Type0FontMetricsProviderTest {
try (PDDocument document = Loader.loadPDF(new File(Type0FontMetricsProviderTest.class.getClassLoader().getResource("InvisibleText.pdf").getPath()))) {
Type0FontMetricsProvider metricsFactory = Type0FontMetricsProvider.regular(document);
FontMetrics fontMetrics = metricsFactory.calculateMetrics("deine mutter", 100, 50);
FontMetrics fontMetrics = metricsFactory.calculateMetricsForAzureBBox("deine mutter", 100, 50);
}
}

View File

@ -8,6 +8,9 @@ plugins {
id("org.sonarqube") version "4.3.0.3225"
id("io.freefair.lombok") version "8.4"
}
pmd {
isConsoleOutput = true
}
configurations {
all {

View File

@ -7,6 +7,7 @@ import java.nio.file.Path;
import java.time.OffsetDateTime;
import java.time.temporal.ChronoUnit;
import org.slf4j.MDC;
import org.springframework.amqp.AmqpRejectAndDontRequeueException;
import org.springframework.amqp.core.Message;
import org.springframework.amqp.rabbit.annotation.RabbitHandler;
@ -54,8 +55,9 @@ public class OcrMessageReceiver {
Path tmpDir = Files.createTempDirectory(null);
try {
MDC.put("fileId", fileId);
log.info("--------------------------------------------------------------------------");
log.info("Start ocr for file with dossierId {} and fileId {}", dossierId, fileId);
log.info("Starting OCR");
ocrMessageSender.sendOCRStarted(fileId);
@ -65,7 +67,7 @@ public class OcrMessageReceiver {
fileStorageService.downloadFiles(request, documentFile);
ocrService.runOcrOnDocument(dossierId, fileId, request.isRemoveWatermark(), tmpDir, documentFile, viewerDocumentFile, analyzeResultFile);
ocrService.runOcrOnDocument(dossierId, fileId, request.getFeatures(), tmpDir, documentFile, viewerDocumentFile, analyzeResultFile);
fileStorageService.storeFiles(request, documentFile, viewerDocumentFile, analyzeResultFile);
@ -76,6 +78,7 @@ public class OcrMessageReceiver {
in.getMessageProperties().getHeaders().put(MessagingConfiguration.X_ERROR_INFO_TIMESTAMP_HEADER, OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS));
throw new RuntimeException(e);
} finally {
MDC.remove("fileId");
FileSystemUtils.deleteRecursively(tmpDir);
}
}

View File

@ -7,11 +7,21 @@
<include resource="org/springframework/boot/logging/logback/console-appender.xml"/>
<appender name="JSON" class="ch.qos.logback.core.ConsoleAppender">
<encoder class="net.logstash.logback.encoder.LogstashEncoder"/>
<encoder class="net.logstash.logback.encoder.LogstashEncoder">
<pattern>%d{yyyy-MM-dd HH:mm:ss}%replace( [file:%X{fileId}]){' \[file:\]', ''} [%thread] %-5level%logger{36} - %msg%n</pattern>
</encoder>
</appender>
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss}%replace( [file:%X{fileId}]){' \[file:\]', ''} [%thread] %-5level%logger{36} - %msg%n</pattern>
</encoder>
</appender>
<root level="INFO">
<appender-ref ref="${logType}"/>
</root>
<logger name="com.iqser.red.pdftronlogic.commons" level="ERROR"/>
</configuration>

View File

@ -8,7 +8,9 @@ import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.MockitoAnnotations;
import org.mockito.junit.jupiter.MockitoExtension;
import org.springframework.amqp.rabbit.core.RabbitAdmin;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.amqp.rabbit.listener.RabbitListenerEndpointRegistry;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
@ -52,8 +54,13 @@ public class AbstractTest {
@MockBean
protected RabbitTemplate rabbitTemplate;
private static String pdftronLicense;
@MockBean
private RabbitAdmin rabbitAdmin;
@MockBean
private RabbitListenerEndpointRegistry rabbitListenerEndpointRegistry;
private static String pdftronLicense;
@BeforeEach
public void openMocks() {

View File

@ -9,23 +9,28 @@ import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.slf4j.MDC;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.core.io.ClassPathResource;
import com.knecon.fforesight.service.ocr.processor.service.OCRService;
import com.knecon.fforesight.service.ocr.processor.utils.OsUtils;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import lombok.SneakyThrows;
@Disabled // in order to run, the azure.key must be set first in the application.yml
@Disabled
// in order to run, the azure.key must be set first in the application.yml and you must set the env variable VCPKG_DYNAMIC_LIB to your tesseract and leptonica installation folder
@SpringBootTest()
public class OcrServiceIntegrationTest extends AbstractTest {
public static final Set<AzureOcrFeature> FEATURES = Set.of(AzureOcrFeature.ROTATION_CORRECTION, AzureOcrFeature.FONT_STYLE_DETECTION);
@Autowired
private OCRService ocrService;
@ -34,7 +39,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
@SneakyThrows
public void testOcrWith2000PageFile() {
testOCR("/home/kschuettler/Dokumente/TestFiles/OCR/VV-331340-first100.pdf");
testOCR("/home/kschuettler/Dokumente/TestFiles/OCR/brokenText.pdf");
}
@ -50,7 +55,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
@SneakyThrows
public void testOcrWithFile() {
testOCR("/home/kschuettler/Dokumente/TestFiles/syn-dm-testfiles/1.A16148F - Toxicidade oral aguda.pdf");
testOCR("/home/kschuettler/Dokumente/LayoutparsingEvaluation/RAW_FILES/Difficult Headlines/VV-284053.pdf/VV-284053.pdf.ORIGIN.pdf");
}
@ -58,7 +63,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
@SneakyThrows
public void testOcrWithFolder() {
String dir = "/home/kschuettler/Dokumente/TestFiles/BASF/Documine_Test_docs/2013-1110704.pdf";
String dir = "/home/kschuettler/Dokumente/TestFiles/OCR/TestSet";
List<File> foundFiles = Files.walk(Path.of(dir))
.sorted(Comparator.comparingLong(this::getFileSize))
.map(Path::toFile)
@ -97,6 +102,8 @@ public class OcrServiceIntegrationTest extends AbstractTest {
@SneakyThrows
private String testOCR(File file) {
MDC.put("fileId", "test");
Path tmpDir = Path.of(OsUtils.getTemporaryDirectory()).resolve("OCR_TEST").resolve(file.toPath().getFileName());
assert tmpDir.toFile().exists() || tmpDir.toFile().mkdirs();
@ -108,7 +115,8 @@ public class OcrServiceIntegrationTest extends AbstractTest {
Files.copy(file.toPath(), documentFile, StandardCopyOption.REPLACE_EXISTING);
Files.copy(file.toPath(), viewerDocumentFile, StandardCopyOption.REPLACE_EXISTING);
ocrService.runOcrOnDocument(TEST_DOSSIER_ID, "file", false, tmpDir, documentFile.toFile(), viewerDocumentFile.toFile(), analyzeResultFile.toFile());
ocrService.runOcrOnDocument(TEST_DOSSIER_ID, "file", FEATURES, tmpDir, documentFile.toFile(), viewerDocumentFile.toFile(), analyzeResultFile.toFile());
MDC.remove("fileId");
System.out.println("File:" + documentFile);
System.out.println("\n\n");
try (var fileStream = new FileInputStream(documentFile.toFile())) {

View File

@ -28,7 +28,7 @@ if [ -z "$1" ]; then
fi
namespace=${1}
deployment_name="ocr-service-v1"
deployment_name="azure-ocr-service"
echo "deploying to ${namespace}"