Compare commits

...

19 Commits

Author SHA1 Message Date
Kilian Schüttler
5fca39728b Merge branch 'RED-10365' into 'master'
RED-10365: update pdftron logic commons to avoid crash for specific file

See merge request fforesight/ocr-service!59
2024-11-06 09:25:26 +01:00
Kilian Schuettler
cd6390fde1 RED-10365: update pdftron logic commons to avoid crash for specific file 2024-11-06 09:09:50 +01:00
Kilian Schüttler
bc459ee966 Merge branch 'RED-9864' into 'master'
RED-9864: sped up invisible element removal, fixed crash

See merge request fforesight/ocr-service!58
2024-08-26 15:27:07 +02:00
Kilian Schuettler
47e7f8b297 RED-9864: sped up invisible element removal, fixed crash 2024-08-26 15:23:11 +02:00
Kilian Schüttler
22392e083d Merge branch 'RED-9746' into 'master'
RED-9746: update pdftron-ologic-commons version

See merge request fforesight/ocr-service!57
2024-08-20 09:43:58 +02:00
Kilian Schuettler
52a1fb4a05 RED-9746: update pdftron-ologic-commons version
* fox build
2024-08-19 13:41:46 +02:00
Kilian Schüttler
378436cb2f Merge branch 'RED-8800' into 'master'
RRED-8800: adjust coords to cropbox

See merge request fforesight/ocr-service!55
2024-07-15 17:51:06 +02:00
Kilian Schuettler
f1204acc60 RRED-8800: adjust coords to cropbox 2024-07-15 17:46:50 +02:00
Andrei Isvoran
998755c3e3 Merge branch 'RED-9496' into 'master'
RED-9496 - Implement graceful shutdown

See merge request fforesight/ocr-service!54
2024-07-04 12:35:01 +02:00
Andrei Isvoran
c598f62633 RED-9496 - Implement graceful shutdown 2024-07-04 12:17:12 +03:00
Corina Olariu
2e25ee2155 Merge branch 'RED-8701-deletefile' into 'master'
RED-8701 - Move files to customer data repositories

See merge request fforesight/ocr-service!53
2024-05-17 09:56:29 +02:00
Corina Olariu
7f04fb3c6f RED-8701 - Move files to customer data repositories
- remove one customer file (single page)
2024-05-17 10:48:10 +03:00
Andrei Isvoran
ff32f016eb Merge branch 'RED-9157-tracing' into 'master'
RED-9157 - Update tracing

See merge request fforesight/ocr-service!52
2024-05-15 09:59:00 +02:00
Andrei Isvoran
821ef265fe RED-9157 - Update tracing 2024-05-15 10:40:31 +03:00
Kilian Schüttler
7fcb6652ef Merge branch 'RED-7669' into 'master'
RED-7669: improve ocr

See merge request fforesight/ocr-service!51
2024-05-13 15:03:06 +02:00
Kilian Schuettler
61b1010e24 RED-7669: improve ocr
* fix pmd
2024-05-13 12:59:40 +02:00
Kilian Schuettler
7b5a175440 RED-7669: improve ocr
* fix pmd
2024-05-13 11:35:57 +02:00
Kilian Schuettler
18ba1daaef RED-7669: improve ocr
* decrease otsu-scorefract slightly for thin lines
* don't write text that is overlapped with existing text
2024-05-08 10:55:38 +02:00
Kilian Schuettler
c61f71871e RED-7669: improve ocr
* decrease otsu-scorefract slightly for thin lines
* don't write text that is overlapped with existing text
2024-05-08 10:54:25 +02:00
13 changed files with 219 additions and 69 deletions

View File

@ -15,6 +15,7 @@ dependencies {
api("com.iqser.red.commons:metric-commons:2.1.0") api("com.iqser.red.commons:metric-commons:2.1.0")
api("com.iqser.red.commons:storage-commons:2.45.0") api("com.iqser.red.commons:storage-commons:2.45.0")
api("com.knecon.fforesight:tenant-commons:0.21.0") api("com.knecon.fforesight:tenant-commons:0.21.0")
api("com.knecon.fforesight:lifecycle-commons:0.6.0")
api("com.pdftron:PDFNet:10.5.0") api("com.pdftron:PDFNet:10.5.0")
api("org.apache.pdfbox:pdfbox:3.0.0") api("org.apache.pdfbox:pdfbox:3.0.0")
api("org.apache.pdfbox:jbig2-imageio:3.0.4") api("org.apache.pdfbox:jbig2-imageio:3.0.4")
@ -24,7 +25,7 @@ dependencies {
api("io.github.karols:hocr4j:0.2.0") api("io.github.karols:hocr4j:0.2.0")
api("com.amazonaws:aws-java-sdk-kms:1.12.440") api("com.amazonaws:aws-java-sdk-kms:1.12.440")
api("com.google.guava:guava:31.1-jre") api("com.google.guava:guava:31.1-jre")
api("com.iqser.red.commons:pdftron-logic-commons:2.27.0") api("com.iqser.red.commons:pdftron-logic-commons:2.32.0")
api("com.knecon.fforesight:viewer-doc-processor:0.89.0") api("com.knecon.fforesight:viewer-doc-processor:0.125.0")
testImplementation("org.junit.jupiter:junit-jupiter:5.8.1") testImplementation("org.junit.jupiter:junit-jupiter:5.8.1")
} }

View File

@ -1,17 +1,20 @@
package com.knecon.fforesight.service.ocr.processor.initializer; package com.knecon.fforesight.service.ocr.processor.initializer;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import com.pdftron.pdf.PDFNet; import com.pdftron.pdf.PDFNet;
import com.sun.jna.NativeLibrary; import com.sun.jna.NativeLibrary;
import jakarta.annotation.PostConstruct; import jakarta.annotation.PostConstruct;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows; import lombok.SneakyThrows;
import org.springframework.beans.factory.annotation.Value; import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
@Slf4j
@Component @Component
@RequiredArgsConstructor @RequiredArgsConstructor
public class PDFNetInitializer { public class NativeLibrariesInitializer {
@Value("${pdftron.license:}") @Value("${pdftron.license:}")
private String pdftronLicense; private String pdftronLicense;
@ -22,8 +25,25 @@ public class PDFNetInitializer {
// Do not change back to application runner, if it is application runner it takes messages from the queue before PDFNet is initialized, that leads to UnsatisfiedLinkError. // Do not change back to application runner, if it is application runner it takes messages from the queue before PDFNet is initialized, that leads to UnsatisfiedLinkError.
public void init() { public void init() {
log.info("Initializing Native Libraries");
log.info("Setting pdftron license: {}", pdftronLicense);
PDFNet.setTempPath("/tmp/pdftron"); PDFNet.setTempPath("/tmp/pdftron");
PDFNet.initialize(pdftronLicense); PDFNet.initialize(pdftronLicense);
log.info("Setting jna.library.path: {}", System.getenv("VCPKG_DYNAMIC_LIB"));
System.setProperty("jna.library.path", System.getenv("VCPKG_DYNAMIC_LIB")); System.setProperty("jna.library.path", System.getenv("VCPKG_DYNAMIC_LIB"));
log.info("Asserting Native Libraries loaded");
try (NativeLibrary leptonicaLib = NativeLibrary.getInstance("leptonica")) {
assert leptonicaLib != null;
log.info("Leptonica library loaded from {}", leptonicaLib.getFile().getAbsolutePath());
}
try (NativeLibrary tesseractLib = NativeLibrary.getInstance("tesseract")) {
assert tesseractLib != null;
log.info("Tesseract library loaded from {}", tesseractLib.getFile().getAbsolutePath());
}
} }
} }

View File

@ -1,5 +1,6 @@
package com.knecon.fforesight.service.ocr.processor.model; package com.knecon.fforesight.service.ocr.processor.model;
import java.awt.Rectangle;
import java.awt.geom.AffineTransform; import java.awt.geom.AffineTransform;
import java.awt.geom.Line2D; import java.awt.geom.Line2D;
import java.awt.geom.Point2D; import java.awt.geom.Point2D;
@ -34,6 +35,16 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
new Point2D.Double(bounds.getRight(), bounds.getBottom())); new Point2D.Double(bounds.getRight(), bounds.getBottom()));
} }
public Rectangle2D getBounds2D() {
double minX = Math.min(Math.min(Math.min(a.getX(), b.getX()), c.getX()), d.getX());
double minY = Math.min(Math.min(Math.min(a.getY(), b.getY()), c.getY()), d.getY());
double maxX = Math.max(Math.max(Math.max(a.getX(), b.getX()), c.getX()), d.getX());
double maxY = Math.max(Math.max(Math.max(a.getY(), b.getY()), c.getY()), d.getY());
return new Rectangle2D.Double(minX, minY, maxX - minX, maxY - minY);
}
public QuadPoint getTransformed(AffineTransform at) { public QuadPoint getTransformed(AffineTransform at) {

View File

@ -24,7 +24,7 @@ public class RenderedPageOcrImage implements OcrImage {
public AffineTransform getImageCTM() { public AffineTransform getImageCTM() {
double scalingFactor = calculateScalingFactor(); double scalingFactor = calculateScalingFactor();
AffineTransform imageToCropBoxScaling = new AffineTransform(scalingFactor, 0, 0, scalingFactor, -pageInformation.minX(), -pageInformation.minY()); AffineTransform imageToCropBoxScaling = new AffineTransform(scalingFactor, 0, 0, scalingFactor, 0, 0);
AffineTransform mirrorMatrix = new AffineTransform(1, 0, 0, -1, 0, pageInformation.height()); AffineTransform mirrorMatrix = new AffineTransform(1, 0, 0, -1, 0, pageInformation.height());

View File

@ -3,19 +3,23 @@ package com.knecon.fforesight.service.ocr.processor.service;
import java.awt.Color; import java.awt.Color;
import java.awt.geom.Line2D; import java.awt.geom.Line2D;
import java.awt.geom.Point2D; import java.awt.geom.Point2D;
import java.awt.geom.Rectangle2D;
import java.io.File; import java.io.File;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Optional; import java.util.Optional;
import java.util.function.Function; import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode; import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import com.iqser.red.pdftronlogic.commons.Converter;
import com.knecon.fforesight.service.ocr.processor.model.OcrResultToWrite; import com.knecon.fforesight.service.ocr.processor.model.OcrResultToWrite;
import com.knecon.fforesight.service.ocr.processor.model.QuadPoint; import com.knecon.fforesight.service.ocr.processor.model.QuadPoint;
import com.knecon.fforesight.service.ocr.processor.model.TextPositionInImage; import com.knecon.fforesight.service.ocr.processor.model.TextPositionInImage;
@ -26,6 +30,9 @@ import com.knecon.fforesight.service.viewerdoc.model.PlacedText;
import com.knecon.fforesight.service.viewerdoc.model.Visualizations; import com.knecon.fforesight.service.viewerdoc.model.Visualizations;
import com.knecon.fforesight.service.viewerdoc.model.VisualizationsOnPage; import com.knecon.fforesight.service.viewerdoc.model.VisualizationsOnPage;
import com.knecon.fforesight.service.viewerdoc.service.ViewerDocumentService; import com.knecon.fforesight.service.viewerdoc.service.ViewerDocumentService;
import com.pdftron.pdf.PDFDoc;
import com.pdftron.pdf.Page;
import com.pdftron.pdf.TextExtractor;
import lombok.AccessLevel; import lombok.AccessLevel;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
@ -39,96 +46,161 @@ import lombok.extern.slf4j.Slf4j;
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class OcrResultWriter { public class OcrResultWriter {
public static final Color REGULAR_TEXT_COLOR = Color.BLUE;
public static final Color BOLD_TEXT_COLOR = Color.CYAN;
public static final Color REGULAR_TEXT_IN_IGNORE_ZONE = Color.RED;
public static final Color BOLD_TEXT_IN_IGNORE_ZONE = Color.RED;
ViewerDocumentService viewerDocumentService; ViewerDocumentService viewerDocumentService;
@SneakyThrows @SneakyThrows
public void drawOcrResultsToPdf(File document, File viewerDocument, Map<Integer, List<OcrResultToWrite>> imagesWithResultsPerPage) { public void drawOcrResultsToPdf(File document, File viewerDocument, Map<Integer, List<OcrResultToWrite>> imagesWithResultsPerPage) {
Map<Integer, VisualizationsOnPage> ocrVisualizationsOnPages = createVisualizations(imagesWithResultsPerPage); Map<Integer, VisualizationsOnPage> ocrVisualizationsOnPages = new HashMap<>();
Map<Integer, VisualizationsOnPage> ocrTextDebugVisualizationsOnPages = createDebugTextVisualizations(imagesWithResultsPerPage); Map<Integer, VisualizationsOnPage> ocrTextDebugVisualizationsOnPages = new HashMap<>();
Map<Integer, VisualizationsOnPage> ocrBBoxDebugVisualizationsOnPages = createDebugBBoxVisualizations(imagesWithResultsPerPage); Map<Integer, VisualizationsOnPage> ocrBBoxDebugVisualizationsOnPages = new HashMap<>();
try (var in = new FileInputStream(document); PDFDoc doc = new PDFDoc(in)) {
for (Integer pageNumber : imagesWithResultsPerPage.keySet()) {
List<Rectangle2D> textBBoxes = getTextBBoxes(doc.getPage(pageNumber));
ocrVisualizationsOnPages.put(pageNumber - 1, createVisualizations(imagesWithResultsPerPage.get(pageNumber), textBBoxes));
ocrTextDebugVisualizationsOnPages.put(pageNumber - 1, createDebugTextVisualizations(imagesWithResultsPerPage.get(pageNumber), textBBoxes));
ocrBBoxDebugVisualizationsOnPages.put(pageNumber - 1, createDebugBBoxVisualizations(imagesWithResultsPerPage.get(pageNumber)));
}
}
Visualizations visualizations = new Visualizations(ContentStreams.KNECON_OCR, ocrVisualizationsOnPages, false); Visualizations visualizations = new Visualizations(ContentStreams.KNECON_OCR, ocrVisualizationsOnPages, false);
List<Visualizations> debugVisualizations = List.of(visualizations, List<Visualizations> debugVisualizations = List.of(visualizations,
new Visualizations(ContentStreams.KNECON_OCR_TEXT_DEBUG, ocrTextDebugVisualizationsOnPages, false), new Visualizations(ContentStreams.KNECON_OCR_TEXT_DEBUG, ocrTextDebugVisualizationsOnPages, false),
new Visualizations(ContentStreams.KNECON_OCR_BBOX_DEBUG, ocrBBoxDebugVisualizationsOnPages, false)); new Visualizations(ContentStreams.KNECON_OCR_BBOX_DEBUG, ocrBBoxDebugVisualizationsOnPages, false));
viewerDocumentService.addVisualizationsOnPage(document, document, visualizations); viewerDocumentService.addVisualizationsOnPage(document, document, List.of(visualizations));
viewerDocumentService.addVisualizationsOnPage(viewerDocument, viewerDocument, debugVisualizations); viewerDocumentService.addVisualizationsOnPage(viewerDocument, viewerDocument, debugVisualizations);
} }
private Map<Integer, VisualizationsOnPage> createVisualizations(Map<Integer, List<OcrResultToWrite>> imagesWithResultsPerPage) { @SuppressWarnings("PMD")
private List<Rectangle2D> getTextBBoxes(Page page) {
return imagesWithResultsPerPage.keySet() List<Rectangle2D> textBBoxes = new ArrayList<>();
.stream() try (var textExtractor = new TextExtractor()) {
.collect(Collectors.toMap(pageNumber1IdxTo0IdxMapper(), pageNumber -> createVisualizations(imagesWithResultsPerPage.get(pageNumber)))); textExtractor.begin(page);
try {
for (TextExtractor.Line line = textExtractor.getFirstLine(); line.isValid(); line = getNextLine(line)) {
for (TextExtractor.Word word = line.getFirstWord(); word.isValid(); word = getNextWord(word)) {
textBBoxes.add(Converter.toRectangle2D(word.getBBox()));
}
}
} catch (Exception e) {
log.warn("Could not get word dimension, {}", e.getMessage());
}
return textBBoxes;
}
} }
private static Function<Integer, Integer> pageNumber1IdxTo0IdxMapper() { private static TextExtractor.Word getNextWord(TextExtractor.Word word) {
// PDFBox uses a 0-based index for page numbers internally, while we use a 1-based index
return p -> p - 1; TextExtractor.Word nextWord = word.getNextWord();
word.close();
return nextWord;
} }
private VisualizationsOnPage createVisualizations(List<OcrResultToWrite> ocrResultsToWrite) { private static TextExtractor.Line getNextLine(TextExtractor.Line line) {
TextExtractor.Line newLine = line.getNextLine();
line.close();
return newLine;
}
private VisualizationsOnPage createVisualizations(List<OcrResultToWrite> ocrResultsToWrite, List<Rectangle2D> ignoreZones) {
List<TextPositionInImage> words = ocrResultsToWrite.stream()
.map(OcrResultToWrite::textPositionInImage)
.flatMap(Collection::stream)
.filter(word -> ignoreZones.stream()
.noneMatch(ignoreZone -> word.getTransformedTextBBox().getBounds2D().intersects(ignoreZone)))
.toList();
List<TextPositionInImage> words = ocrResultsToWrite.stream().map(OcrResultToWrite::textPositionInImage).flatMap(Collection::stream).toList();
List<PlacedText> placedTexts = words.stream() List<PlacedText> placedTexts = words.stream()
.map(word -> new PlacedText(word.getText(), .map(word -> new PlacedText(word.getText(),
null, null,
Color.BLACK, Color.BLACK,
(float) word.getFontSize(), (float) word.getFontSize(),
word.getFontMetricsFactory(), word.getFontMetricsFactory(),
Optional.of(word.getTextMatrix()), Optional.of(word.getTextMatrix()),
Optional.of(RenderingMode.NEITHER))) Optional.of(RenderingMode.NEITHER)))
.toList(); .toList();
return VisualizationsOnPage.builder().placedTexts(placedTexts).build(); return VisualizationsOnPage.builder().placedTexts(placedTexts).build();
} }
private Map<Integer, VisualizationsOnPage> createDebugTextVisualizations(Map<Integer, List<OcrResultToWrite>> imagesWithResultsPerPage) { private VisualizationsOnPage createDebugTextVisualizations(List<OcrResultToWrite> ocrResultsToWrite, List<Rectangle2D> textBBoxes) {
return imagesWithResultsPerPage.keySet() List<TextPositionInImage> wordsToDraw = new ArrayList<>();
.stream() List<TextPositionInImage> ignoredWords = new ArrayList<>();
.collect(Collectors.toMap(pageNumber1IdxTo0IdxMapper(), pageNumber -> createDebugTextVisualizations(imagesWithResultsPerPage.get(pageNumber))));
}
for (OcrResultToWrite ocrResultToWrite : ocrResultsToWrite) {
private VisualizationsOnPage createDebugTextVisualizations(List<OcrResultToWrite> ocrResultsToWrite) { for (TextPositionInImage textPositionInImage : ocrResultToWrite.textPositionInImage()) {
if (textBBoxes.stream()
List<TextPositionInImage> words = ocrResultsToWrite.stream().map(OcrResultToWrite::textPositionInImage).flatMap(Collection::stream).toList(); .anyMatch(ignoreZone -> textPositionInImage.getTransformedTextBBox().getBounds2D().intersects(ignoreZone))) {
List<PlacedText> placedTexts = words.stream() ignoredWords.add(textPositionInImage);
} else {
wordsToDraw.add(textPositionInImage);
}
}
}
Stream<PlacedText> placedTexts = wordsToDraw.stream()
.map(word -> new PlacedText(word.getText(), .map(word -> new PlacedText(word.getText(),
null, null,
word.getFontStyle().equals(FontStyle.REGULAR) ? Color.BLUE : Color.RED, word.getFontStyle().equals(FontStyle.REGULAR) ? REGULAR_TEXT_COLOR : BOLD_TEXT_COLOR,
(float) word.getFontSize(), (float) word.getFontSize(),
word.getFontMetricsFactory(), word.getFontMetricsFactory(),
Optional.of(word.getTextMatrix()), Optional.of(word.getTextMatrix()),
Optional.of(RenderingMode.FILL))) Optional.of(RenderingMode.FILL)));
.toList();
return VisualizationsOnPage.builder().placedTexts(placedTexts).build();
}
Stream<PlacedText> placedTexts2 = ignoredWords.stream()
.map(word -> new PlacedText(word.getText(),
null,
word.getFontStyle().equals(FontStyle.REGULAR) ? REGULAR_TEXT_IN_IGNORE_ZONE : BOLD_TEXT_IN_IGNORE_ZONE,
(float) word.getFontSize(),
word.getFontMetricsFactory(),
Optional.of(word.getTextMatrix()),
Optional.of(RenderingMode.FILL)));
private Map<Integer, VisualizationsOnPage> createDebugBBoxVisualizations(Map<Integer, List<OcrResultToWrite>> imagesWithResultsPerPage) { return VisualizationsOnPage.builder()
.placedTexts(Stream.of(placedTexts, placedTexts2)
return imagesWithResultsPerPage.keySet() .flatMap(Function.identity())
.stream() .toList())
.collect(Collectors.toMap(pageNumber1IdxTo0IdxMapper(), pageNumber -> createDebugBBoxVisualizations(imagesWithResultsPerPage.get(pageNumber)))); .build();
} }
private VisualizationsOnPage createDebugBBoxVisualizations(List<OcrResultToWrite> ocrResultsToWrite) { private VisualizationsOnPage createDebugBBoxVisualizations(List<OcrResultToWrite> ocrResultsToWrite) {
List<TextPositionInImage> words = ocrResultsToWrite.stream().map(OcrResultToWrite::textPositionInImage).flatMap(Collection::stream).toList(); List<TextPositionInImage> words = ocrResultsToWrite.stream()
.map(OcrResultToWrite::textPositionInImage)
.flatMap(Collection::stream)
.toList();
List<ColoredLine> coloredLines = Stream.concat(// List<ColoredLine> coloredLines = Stream.concat(//
words.stream().map(TextPositionInImage::getTransformedTextBBox).map(this::quadPointAsLines),// words.stream()
ocrResultsToWrite.stream().map(OcrResultToWrite::imageBoundingBox).map(this::createGrid)// .map(TextPositionInImage::getTransformedTextBBox)
).flatMap(Collection::stream).toList(); .map(this::quadPointAsLines),//
ocrResultsToWrite.stream()
.map(OcrResultToWrite::imageBoundingBox)
.map(this::createGrid)//
)
.flatMap(Collection::stream)
.toList();
return VisualizationsOnPage.builder().coloredLines(coloredLines).build(); return VisualizationsOnPage.builder().coloredLines(coloredLines).build();
} }
@ -136,9 +208,9 @@ public class OcrResultWriter {
private List<ColoredLine> quadPointAsLines(QuadPoint rect) { private List<ColoredLine> quadPointAsLines(QuadPoint rect) {
return List.of(new ColoredLine(new Line2D.Double(rect.a(), rect.b()), Color.ORANGE, 1), return List.of(new ColoredLine(new Line2D.Double(rect.a(), rect.b()), Color.ORANGE, 1),
new ColoredLine(new Line2D.Double(rect.b(), rect.c()), Color.BLUE, 1), new ColoredLine(new Line2D.Double(rect.b(), rect.c()), Color.BLUE, 1),
new ColoredLine(new Line2D.Double(rect.c(), rect.d()), Color.GREEN, 1), new ColoredLine(new Line2D.Double(rect.c(), rect.d()), Color.GREEN, 1),
new ColoredLine(new Line2D.Double(rect.d(), rect.a()), Color.MAGENTA, 1)); new ColoredLine(new Line2D.Double(rect.d(), rect.a()), Color.MAGENTA, 1));
} }

View File

@ -227,7 +227,7 @@ public class ImageProcessingThread extends Thread {
if (pix.w < 100 || pix.h < 100) { if (pix.w < 100 || pix.h < 100) {
binarized = Leptonica1.pixThresholdToBinary(gaussian, 170); binarized = Leptonica1.pixThresholdToBinary(gaussian, 170);
} else { } else {
binarized = Leptonica1.pixOtsuThreshOnBackgroundNorm(gaussian, null, 50, 50, 165, 10, 100, 5, 5, 0.2f, null); binarized = Leptonica1.pixOtsuThreshOnBackgroundNorm(gaussian, null, 50, 50, 165, 10, 100, 5, 5, 0.1f, null);
if (binarized == null) { // Sometimes Otsu just fails, then we binarize directly if (binarized == null) { // Sometimes Otsu just fails, then we binarize directly
binarized = Leptonica1.pixThresholdToBinary(gaussian, 170); binarized = Leptonica1.pixThresholdToBinary(gaussian, 170);
} }

View File

@ -23,7 +23,7 @@ dependencies {
implementation(project(":ocr-service-processor")) implementation(project(":ocr-service-processor"))
implementation(project(":ocr-service-api")) implementation(project(":ocr-service-api"))
implementation("com.knecon.fforesight:tracing-commons:0.5.0") implementation("com.knecon.fforesight:tracing-commons:0.7.0")
implementation("org.springframework.cloud:spring-cloud-starter-openfeign:4.1.1") implementation("org.springframework.cloud:spring-cloud-starter-openfeign:4.1.1")
implementation("org.springframework.boot:spring-boot-starter-amqp:${springBootStarterVersion}") implementation("org.springframework.boot:spring-boot-starter-amqp:${springBootStarterVersion}")
@ -39,7 +39,7 @@ tasks.named<BootBuildImage>("bootBuildImage") {
environment.put("BPE_DELIM_JAVA_TOOL_OPTIONS", " ") environment.put("BPE_DELIM_JAVA_TOOL_OPTIONS", " ")
environment.put("BPE_APPEND_JAVA_TOOL_OPTIONS", "-Dfile.encoding=UTF-8") environment.put("BPE_APPEND_JAVA_TOOL_OPTIONS", "-Dfile.encoding=UTF-8")
environment.put("BPE_GS_LIB", "/layers/fagiani_apt/apt/usr/share/ghostscript/9.26/Resource/Init/") // set ghostscript lib path environment.put("BPE_GS_LIB", "/layers/fagiani_apt/apt/usr/share/ghostscript/9.55.0/Resource/Init/") // set ghostscript lib path, version in path must match version in Aptfile
environment.put("BPE_FONTCONFIG_PATH", "/layers/fagiani_apt/apt/etc/fonts/") // set ghostscript fontconfig path environment.put("BPE_FONTCONFIG_PATH", "/layers/fagiani_apt/apt/etc/fonts/") // set ghostscript fontconfig path
var aptfile = layout.projectDirectory.file("src/main/resources/Aptfile").toString() var aptfile = layout.projectDirectory.file("src/main/resources/Aptfile").toString()

View File

@ -6,23 +6,27 @@ import org.springframework.boot.autoconfigure.ImportAutoConfiguration;
import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration; import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration;
import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.EnableAspectJAutoProxy;
import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Import;
import org.springframework.scheduling.annotation.EnableAsync; import org.springframework.scheduling.annotation.EnableAsync;
import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService; import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService;
import com.iqser.red.pdftronlogic.commons.WatermarkRemovalService; import com.iqser.red.pdftronlogic.commons.WatermarkRemovalService;
import com.knecon.fforesight.lifecyclecommons.LifecycleAutoconfiguration;
import com.knecon.fforesight.service.ocr.processor.OcrServiceProcessorConfiguration; import com.knecon.fforesight.service.ocr.processor.OcrServiceProcessorConfiguration;
import com.knecon.fforesight.service.ocr.v1.server.queue.MessagingConfiguration; import com.knecon.fforesight.service.ocr.v1.server.queue.MessagingConfiguration;
import com.iqser.red.storage.commons.StorageAutoConfiguration; import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.knecon.fforesight.tenantcommons.MultiTenancyAutoConfiguration; import com.knecon.fforesight.tenantcommons.MultiTenancyAutoConfiguration;
import com.knecon.fforesight.tracing.OpenTelemetryConfig;
import io.micrometer.core.aop.TimedAspect; import io.micrometer.core.aop.TimedAspect;
import io.micrometer.core.instrument.MeterRegistry; import io.micrometer.core.instrument.MeterRegistry;
@EnableAsync @EnableAsync
@ImportAutoConfiguration({MultiTenancyAutoConfiguration.class}) @ImportAutoConfiguration({MultiTenancyAutoConfiguration.class, LifecycleAutoconfiguration.class})
@SpringBootApplication(exclude = {SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class}) @SpringBootApplication(exclude = {SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class})
@Import({MessagingConfiguration.class, StorageAutoConfiguration.class, OcrServiceProcessorConfiguration.class}) @Import({MessagingConfiguration.class, StorageAutoConfiguration.class, OcrServiceProcessorConfiguration.class, OpenTelemetryConfig.class})
@EnableAspectJAutoProxy
public class Application { public class Application {
/** /**

View File

@ -1,5 +1,5 @@
# you can list packages # you can list packages
ghostscript ghostscript=9.55.0~dfsg1-0ubuntu5.9
pkg-config pkg-config
zip zip
unzip unzip
@ -11,6 +11,7 @@ libk5crypto3
libkrb5support0 libkrb5support0
libkeyutils1 libkeyutils1
libkrb5-3 libkrb5-3
libbrotli1
# or include links to specific .deb files # or include links to specific .deb files
# http://ftp.debian.org/debian/pool/contrib/m/msttcorefonts/ttf-mscorefonts-installer_3.8_all.deb # http://ftp.debian.org/debian/pool/contrib/m/msttcorefonts/ttf-mscorefonts-installer_3.8_all.deb

View File

@ -12,6 +12,9 @@ project.version: 1.0-SNAPSHOT
server: server:
port: 8080 port: 8080
lifecycle:
base-package: com.knecon.fforesight.service.ocr
spring: spring:
application: application:
name: ocr-service name: ocr-service

View File

@ -24,10 +24,10 @@ import org.springframework.context.annotation.Primary;
import org.springframework.test.context.junit.jupiter.SpringExtension; import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.iqser.red.commons.jackson.ObjectMapperFactory; import com.iqser.red.commons.jackson.ObjectMapperFactory;
import com.knecon.fforesight.service.ocr.processor.initializer.PDFNetInitializer;
import com.iqser.red.storage.commons.StorageAutoConfiguration; import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService; import com.iqser.red.storage.commons.service.StorageService;
import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService; import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService;
import com.knecon.fforesight.service.ocr.processor.initializer.NativeLibrariesInitializer;
import com.knecon.fforesight.tenantcommons.TenantsClient; import com.knecon.fforesight.tenantcommons.TenantsClient;
import com.pdftron.pdf.PDFNet; import com.pdftron.pdf.PDFNet;
@ -36,7 +36,7 @@ import lombok.SneakyThrows;
@ExtendWith({SpringExtension.class, MockitoExtension.class}) @ExtendWith({SpringExtension.class, MockitoExtension.class})
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT) @SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import({AbstractTest.TestConfiguration.class, PDFNetInitializer.class}) @Import({AbstractTest.TestConfiguration.class, NativeLibrariesInitializer.class})
@AutoConfigureObservability @AutoConfigureObservability
public class AbstractTest { public class AbstractTest {

View File

@ -15,3 +15,10 @@ management:
health.enabled: true health.enabled: true
endpoints.web.exposure.include: prometheus, health, metrics endpoints.web.exposure.include: prometheus, health, metrics
metrics.export.prometheus.enabled: true metrics.export.prometheus.enabled: true
tracing:
enabled: ${TRACING_ENABLED:false}
sampling:
probability: ${TRACING_PROBABILITY:1.0}
otlp:
tracing:
endpoint: ${OTLP_ENDPOINT:http://otel-collector-opentelemetry-collector.otel-collector:4318/v1/traces}

View File

@ -1,5 +1,9 @@
#!/bin/bash #!/bin/bash
set -e
dir=${PWD##*/} dir=${PWD##*/}
gradle assemble gradle assemble
# Get the current Git branch # Get the current Git branch
@ -11,5 +15,32 @@ commit_hash=$(git rev-parse --short=5 HEAD)
# Combine branch and commit hash # Combine branch and commit hash
buildName="${USER}-${branch}-${commit_hash}" buildName="${USER}-${branch}-${commit_hash}"
gradle bootBuildImage --publishImage -PbuildbootDockerHostNetwork=true -Pversion=$buildName gradle bootBuildImage --publishImage -PbuildbootDockerHostNetwork=true -Pversion=${buildName}
echo "nexus.knecon.com:5001/ff/${dir}-server:$buildName"
newImageName="nexus.knecon.com:5001/ff/ocr-service-server:$buildName"
echo "full image name:"
echo ${newImageName}
echo ""
if [ -z "$1" ]; then
exit 0
fi
namespace=${1}
deployment_name="ocr-service-v1"
echo "deploying to ${namespace}"
oldImageName=$(rancher kubectl -n ${namespace} get deployment ${deployment_name} -o=jsonpath='{.spec.template.spec.containers[*].image}')
if [ "${newImageName}" = "${oldImageName}" ]; then
echo "Image tag did not change, redeploying..."
rancher kubectl rollout restart deployment ${deployment_name} -n ${namespace}
else
echo "upgrading the image tag..."
rancher kubectl set image deployment/${deployment_name} ${deployment_name}=${newImageName} -n ${namespace}
fi
rancher kubectl rollout status deployment ${deployment_name} -n ${namespace}
echo "Built ${deployment_name}:${buildName} and deployed to ${namespace}"