Compare commits

...

10 Commits

Author SHA1 Message Date
Kilian Schüttler
23e63db6c5 Merge branch 'RED-8670' into 'main'
RED-8670: add features to status update

See merge request fforesight/azure-ocr-service!23
2025-01-09 11:27:33 +01:00
Kilian Schüttler
635fd4abf8 RED-8670: add features to status update 2025-01-09 11:27:33 +01:00
Kilian Schuettler
98123a5938 RED-8670: add features to status update 2024-12-17 12:33:32 +01:00
Kilian Schuettler
9bbeaf2335 RED-8670: add features to status update 2024-12-17 12:32:34 +01:00
Kilian Schuettler
b6666f6953 RED-8670: add features to status update 2024-12-17 12:30:26 +01:00
Kilian Schuettler
80dfa16103 RED-8670: add features to status update 2024-12-17 12:20:19 +01:00
Kilian Schüttler
76c8e98384 Merge branch 'RED-8670' into 'main'
RED-8670: write IDP results to file

See merge request fforesight/azure-ocr-service!22
2024-12-06 11:02:03 +01:00
Kilian Schüttler
df154cfe9c RED-8670: write IDP results to file 2024-12-06 11:02:03 +01:00
Kilian Schüttler
e7b61353bf Merge branch 'serverbuild-test' into 'main'
fix ghostscript

See merge request fforesight/azure-ocr-service!21
2024-11-27 10:53:05 +01:00
Kilian Schüttler
d63562ad24 fix ghostscript apt package 2024-11-27 10:53:05 +01:00
29 changed files with 474 additions and 152 deletions

View File

@ -1,5 +1,8 @@
package com.knecon.fforesight.service.ocr.v1.api.model; package com.knecon.fforesight.service.ocr.v1.api.model;
import java.util.Collections;
import java.util.Set;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
import lombok.Builder; import lombok.Builder;
import lombok.Data; import lombok.Data;
@ -12,9 +15,16 @@ import lombok.NoArgsConstructor;
public class OCRStatusUpdateResponse { public class OCRStatusUpdateResponse {
private String fileId; private String fileId;
private Set<AzureOcrFeature> features;
private int numberOfPagesToOCR; private int numberOfPagesToOCR;
private int numberOfOCRedPages; private int numberOfOCRedPages;
private boolean ocrFinished; private boolean ocrFinished;
private boolean ocrStarted; private boolean ocrStarted;
public Set<AzureOcrFeature> getFeatures() {
return features == null ? Collections.emptySet() : features;
}
} }

View File

@ -5,9 +5,12 @@ import java.awt.geom.Line2D;
import java.awt.geom.Point2D; import java.awt.geom.Point2D;
import java.awt.geom.Rectangle2D; import java.awt.geom.Rectangle2D;
import java.util.List; import java.util.List;
import java.util.Objects;
import java.util.stream.Stream; import java.util.stream.Stream;
public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) { import lombok.Getter;
public final class QuadPoint {
public enum Direction { public enum Direction {
RIGHT, RIGHT,
@ -41,7 +44,38 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
* ?|_____|? * ?|_____|?
*/ */
} }
private static final double THRESHOLD_ANGLE = Math.toRadians(5); // QuadPoint is considered straight, when its angles are below this threshold
private static final double THRESHOLD_ANGLE = Math.toRadians(5); // QuadPoint is considered straight, when its angles are below this threshold.
private final Point2D a;
private final Point2D b;
private final Point2D c;
private final Point2D d;
@Getter
private final Direction direction;
// This constructor assumes, the points form a convex polygon, I will omit the assertion for performance reasons.
public QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
this.a = a;
this.b = b;
this.c = c;
this.d = d;
this.direction = calculateDirection();
}
private Direction calculateDirection() {
if (isHorizontal()) {
return a.getX() < d.getX() ? Direction.RIGHT : Direction.LEFT;
}
if (isVertical()) {
return a.getY() < d.getY() ? Direction.UP : Direction.DOWN;
}
return Direction.NONE;
}
public static QuadPoint fromRectangle2D(Rectangle2D rectangle2D) { public static QuadPoint fromRectangle2D(Rectangle2D rectangle2D) {
@ -69,7 +103,9 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
public static QuadPoint fromPolygons(List<Double> polygon) { public static QuadPoint fromPolygons(List<Double> polygon) {
assert polygon.size() == 8; if (polygon.size() != 8) {
throw new AssertionError();
}
return new QuadPoint(new Point2D.Double(polygon.get(0), polygon.get(1)), return new QuadPoint(new Point2D.Double(polygon.get(0), polygon.get(1)),
new Point2D.Double(polygon.get(6), polygon.get(7)), new Point2D.Double(polygon.get(6), polygon.get(7)),
new Point2D.Double(polygon.get(4), polygon.get(5)), new Point2D.Double(polygon.get(4), polygon.get(5)),
@ -115,18 +151,6 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
} }
public Direction getDirection() {
if (isHorizontal()) {
return a.getX() < d.getX() ? Direction.RIGHT : Direction.LEFT;
}
if (isVertical()) {
return a.getY() < d.getY() ? Direction.UP : Direction.DOWN;
}
return Direction.NONE;
}
public Stream<Line2D> asLines() { public Stream<Line2D> asLines() {
return Stream.of(new Line2D.Double(a(), b()), new Line2D.Double(b(), c()), new Line2D.Double(c(), d()), new Line2D.Double(d(), a())); return Stream.of(new Line2D.Double(a(), b()), new Line2D.Double(b(), c()), new Line2D.Double(c(), d()), new Line2D.Double(d(), a()));
@ -146,6 +170,142 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
} }
public boolean contains(double x, double y) {
// split into two triangles, test if either contains the point, assumes the QuadPoint is convex and created correctly. More specifically, the points must be in the correct order.
return triangleContains(a, b, c, x, y) || triangleContains(a, c, d, x, y);
}
/*
checks if a triangle contains a point by converting the point to barycentric coordinates using cramer's rule and then checking if the linear combination is within the bounds of the triangle.
https://en.wikipedia.org/wiki/Barycentric_coordinate_system#Barycentric_coordinates_on_triangles
*/
private boolean triangleContains(Point2D a, Point2D b, Point2D c, double x, double y) {
// area of the triangle
double denominator = ((b.getY() - c.getY()) * (a.getX() - c.getX()) + (c.getX() - b.getX()) * (a.getY() - c.getY()));
double invertedDenominator = 1.0 / denominator;
double alpha = ((b.getY() - c.getY()) * (x - c.getX()) + (c.getX() - b.getX()) * (y - c.getY())) * invertedDenominator;
double beta = ((c.getY() - a.getY()) * (x - c.getX()) + (a.getX() - c.getX()) * (y - c.getY())) * invertedDenominator;
return alpha >= 0 && beta >= 0 && alpha + beta <= 1;
}
public boolean contains(Point2D p) {
return contains(p.getX(), p.getY());
}
public boolean contains(Rectangle2D r) {
double x = r.getX();
double y = r.getY();
double maxY = r.getMaxY();
double maxX = r.getMaxX();
Point2D p1 = new Point2D.Double(x, y);
Point2D p2 = new Point2D.Double(x, maxY);
Point2D p3 = new Point2D.Double(maxX, maxY);
Point2D p4 = new Point2D.Double(maxX, y);
return contains(p1) && contains(p2) && contains(p3) && contains(p4);
}
public double getCenterX() {
return (a.getX() + b.getX() + c.getX() + d.getX()) / 4;
}
public double getCenterY() {
return (a.getY() + b.getY() + c.getY() + d.getY()) / 4;
}
public Point2D getCenter() {
return new Point2D.Double(getCenterX(), getCenterY());
}
public boolean intersects(Line2D line) {
return contains(line.getP1()) || contains(line.getP2()) || asLines().anyMatch(qLine -> qLine.intersectsLine(line));
}
public Line2D getRightLine() {
return new Line2D.Double(getTopRight(), getLowerRight());
}
public Line2D getLeftLine() {
return new Line2D.Double(getTopLeft(), getLowerLeft());
}
public Line2D getBottomLine() {
return new Line2D.Double(getLowerLeft(), getLowerRight());
}
public Line2D getTopLine() {
return new Line2D.Double(getTopLeft(), getTopRight());
}
public Point2D getTopLeft() {
return switch (direction) {
case DOWN -> a;
case LEFT -> d;
case UP -> c;
default -> b;
};
}
public Point2D getTopRight() {
return switch (direction) {
case DOWN -> b;
case LEFT -> a;
case UP -> d;
default -> c;
};
}
public Point2D getLowerRight() {
return switch (direction) {
case DOWN -> c;
case LEFT -> b;
case UP -> a;
default -> d;
};
}
public Point2D getLowerLeft() {
return switch (direction) {
case DOWN -> d;
case LEFT -> c;
case UP -> b;
default -> a;
};
}
/** /**
* Determines if the given QuadPoint aligns with this QuadPoint within a given threshold. * Determines if the given QuadPoint aligns with this QuadPoint within a given threshold.
* It does os by trying every possible combination of aligning sides. It starts with the most likely combination of ab and cd. * It does os by trying every possible combination of aligning sides. It starts with the most likely combination of ab and cd.
@ -224,4 +384,37 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
return Math.atan2(deltaY, deltaX); return Math.atan2(deltaY, deltaX);
} }
public Point2D a() {return a;}
public Point2D b() {return b;}
public Point2D c() {return c;}
public Point2D d() {return d;}
@Override
public boolean equals(Object obj) {
if (obj == this) {
return true;
}
if (obj == null || obj.getClass() != this.getClass()) {
return false;
}
var that = (QuadPoint) obj;
return Objects.equals(this.a, that.a) && Objects.equals(this.b, that.b) && Objects.equals(this.c, that.c) && Objects.equals(this.d, that.d);
}
@Override
public int hashCode() {
return Objects.hash(a, b, c, d);
}
} }

View File

@ -5,4 +5,9 @@ import lombok.Builder;
@Builder @Builder
public record QuadPointData(float[] values) { public record QuadPointData(float[] values) {
public QuadPoint get() {
return QuadPoint.fromData(this);
}
} }

View File

@ -19,7 +19,7 @@ dependencies {
implementation("com.amazonaws:aws-java-sdk-kms:1.12.440") implementation("com.amazonaws:aws-java-sdk-kms:1.12.440")
implementation("com.google.guava:guava:31.1-jre") implementation("com.google.guava:guava:31.1-jre")
implementation("com.knecon.fforesight:viewer-doc-processor:0.193.0") implementation("com.knecon.fforesight:viewer-doc-processor:0.193.0")
implementation("com.azure:azure-ai-documentintelligence:1.0.0-beta.4") implementation("com.azure:azure-ai-documentintelligence:1.0.0")
implementation("com.iqser.red.commons:pdftron-logic-commons:2.32.0") implementation("com.iqser.red.commons:pdftron-logic-commons:2.32.0")

View File

@ -19,7 +19,7 @@ public class OcrServiceSettings {
boolean debug; // writes the ocr layer visibly to the viewer doc pdf boolean debug; // writes the ocr layer visibly to the viewer doc pdf
boolean drawTablesAsLines; // writes the tables to the PDF as invisible lines. boolean drawTablesAsLines; // writes the tables to the PDF as invisible lines.
boolean snuggify = true; // attempts to shrink the word boxes returned by azure to fit the actual word pixels snug boolean snuggify = true; // attempts to shrink the word boxes returned by azure to fit the actual word pixels snug
boolean useCaches = true; // skips azure api, pdf rendering and image processing, when the files are already present boolean useCaches; // skips azure api, pdf rendering and image processing, when the files are already present
boolean azureFontStyleDetection; // omits all image processing and uses azures FONT_STYLE feature (costs 0.6ct per page) boolean azureFontStyleDetection; // omits all image processing and uses azures FONT_STYLE feature (costs 0.6ct per page)
String contentFormat; // Either markdown or text. But, for whatever reason, with markdown enabled, key-values are not written by azure.... String contentFormat; // Either markdown or text. But, for whatever reason, with markdown enabled, key-values are not written by azure....

View File

@ -13,7 +13,7 @@ import com.pdftron.pdf.Rect;
import lombok.SneakyThrows; import lombok.SneakyThrows;
public record PageInformation(Rectangle2D mediabox, int number, int rotationDegrees, List<Rectangle2D> wordBBoxes) { public record PageInformation(Rectangle2D mediabox, Rectangle2D cropBox, int number, int rotationDegrees, List<Rectangle2D> wordBBoxes) {
@SneakyThrows @SneakyThrows
public static Map<Integer, PageInformation> fromPDFDoc(PDFDoc pdfDoc) { public static Map<Integer, PageInformation> fromPDFDoc(PDFDoc pdfDoc) {
@ -34,8 +34,9 @@ public record PageInformation(Rectangle2D mediabox, int number, int rotationDegr
@SneakyThrows @SneakyThrows
public static PageInformation fromPage(int pageNum, Page page) { public static PageInformation fromPage(int pageNum, Page page) {
try (Rect mediaBox = page.getCropBox()) { try (Rect mediaBox = page.getCropBox(); Rect cropBox = page.getCropBox()) {
return new PageInformation(new Rectangle2D.Double(mediaBox.getX1(), mediaBox.getY1(), mediaBox.getWidth(), mediaBox.getHeight()), return new PageInformation(new Rectangle2D.Double(mediaBox.getX1(), mediaBox.getY1(), mediaBox.getWidth(), mediaBox.getHeight()),
new Rectangle2D.Double(cropBox.getX1(), cropBox.getY1(), cropBox.getWidth(), cropBox.getHeight()),
pageNum, pageNum,
page.getRotation() * 90, page.getRotation() * 90,
DocumentTextExtractor.getTextBBoxes(page)); DocumentTextExtractor.getTextBBoxes(page));

View File

@ -123,7 +123,7 @@ public class AsyncOcrService {
private static void handleCompleted(BatchContext batchContext) { private static void handleCompleted(BatchContext batchContext) {
log.info("Completed batch {} with pages {}", batchContext.batch.getIndex(), batchContext.batch); log.info("Batch {}: Completed with pages {}", batchContext.batch.getIndex(), batchContext.batch);
} }

View File

@ -1,21 +1,19 @@
package com.knecon.fforesight.service.ocr.processor.service; package com.knecon.fforesight.service.ocr.processor.service;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
import org.springframework.beans.factory.annotation.Value; import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.yaml.snakeyaml.events.Event;
import com.azure.ai.documentintelligence.DocumentIntelligenceAsyncClient; import com.azure.ai.documentintelligence.DocumentIntelligenceAsyncClient;
import com.azure.ai.documentintelligence.DocumentIntelligenceClientBuilder; import com.azure.ai.documentintelligence.DocumentIntelligenceClientBuilder;
import com.azure.ai.documentintelligence.models.AnalyzeDocumentRequest; import com.azure.ai.documentintelligence.models.AnalyzeDocumentOptions;
import com.azure.ai.documentintelligence.models.AnalyzeOperationDetails;
import com.azure.ai.documentintelligence.models.AnalyzeResult; import com.azure.ai.documentintelligence.models.AnalyzeResult;
import com.azure.ai.documentintelligence.models.AnalyzeResultOperation;
import com.azure.ai.documentintelligence.models.ContentFormat;
import com.azure.ai.documentintelligence.models.DocumentAnalysisFeature; import com.azure.ai.documentintelligence.models.DocumentAnalysisFeature;
import com.azure.ai.documentintelligence.models.DocumentContentFormat;
import com.azure.ai.documentintelligence.models.StringIndexType; import com.azure.ai.documentintelligence.models.StringIndexType;
import com.azure.core.credential.AzureKeyCredential; import com.azure.core.credential.AzureKeyCredential;
import com.azure.core.util.BinaryData; import com.azure.core.util.BinaryData;
@ -46,29 +44,23 @@ public class AzureOcrResource {
@SneakyThrows @SneakyThrows
public PollerFlux<AnalyzeResultOperation, AnalyzeResult> callAzureAsync(BinaryData data, Set<AzureOcrFeature> features) { public PollerFlux<AnalyzeOperationDetails, AnalyzeResult> callAzureAsync(BinaryData data, Set<AzureOcrFeature> features) {
AnalyzeDocumentRequest analyzeRequest = new AnalyzeDocumentRequest().setBase64Source(data.toBytes()); AnalyzeDocumentOptions analyzeDocumentOptions = new AnalyzeDocumentOptions(data.toBytes());
analyzeDocumentOptions.setStringIndexType(StringIndexType.UTF16_CODE_UNIT);
return asyncClient.beginAnalyzeDocument(getModelId(features), analyzeDocumentOptions.setDocumentAnalysisFeatures(buildFeatures(features));
null, analyzeDocumentOptions.setOutputContentFormat(buildContentFormat());
null, return asyncClient.beginAnalyzeDocument(getModelId(features), analyzeDocumentOptions);
StringIndexType.UTF16CODE_UNIT,
buildFeatures(features),
null,
buildContentFormat(),
Collections.emptyList(),
analyzeRequest);
} }
private ContentFormat buildContentFormat() { private DocumentContentFormat buildContentFormat() {
if (Objects.equal(settings.getContentFormat(), "markdown")) { if (Objects.equal(settings.getContentFormat(), "markdown")) {
return ContentFormat.MARKDOWN; return DocumentContentFormat.MARKDOWN;
} }
return ContentFormat.TEXT; return DocumentContentFormat.TEXT;
} }

View File

@ -1,16 +1,24 @@
package com.knecon.fforesight.service.ocr.processor.service; package com.knecon.fforesight.service.ocr.processor.service;
import java.util.Set;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
@Service @Service
public interface IOcrMessageSender { public interface IOcrMessageSender {
void sendUpdate(String fileId, int finishedImages, int totalImages); void sendUpdate(String fileId, int finishedImages, int totalImages, Set<AzureOcrFeature> features);
void sendOCRStarted(String fileId);
void sendOcrFinished(String fileId, int totalImages); void sendOCRStarted(String fileId, Set<AzureOcrFeature> features);
void sendOcrResponse(String dossierId, String fileId);
void sendOcrFinished(String fileId, int totalImages, Set<AzureOcrFeature> features);
void sendOcrResponse(DocumentRequest request);
} }

View File

@ -60,10 +60,10 @@ public class OCRService {
* @param tmpDir working directory for all files * @param tmpDir working directory for all files
* @param documentFile the file to perform ocr on, results are written invisibly * @param documentFile the file to perform ocr on, results are written invisibly
* @param viewerDocumentFile debugging file, results are written visibly in an optional content group * @param viewerDocumentFile debugging file, results are written visibly in an optional content group
* @param analyzeResultFile result file with additional information * @param idpResultFile result file with additional information
*/ */
@Observed(name = "OCRService", contextualName = "run-ocr-on-document") @Observed(name = "OCRService", contextualName = "run-ocr-on-document")
public void runOcrOnDocument(String dossierId, String fileId, Set<AzureOcrFeature> features, Path tmpDir, File documentFile, File viewerDocumentFile, File analyzeResultFile) { public void runOcrOnDocument(String dossierId, String fileId, Set<AzureOcrFeature> features, Path tmpDir, File documentFile, File viewerDocumentFile, File idpResultFile) {
if (features.contains(AzureOcrFeature.REMOVE_WATERMARKS)) { if (features.contains(AzureOcrFeature.REMOVE_WATERMARKS)) {
removeWatermark(documentFile); removeWatermark(documentFile);
@ -71,10 +71,9 @@ public class OCRService {
removeInvisibleElements(documentFile); removeInvisibleElements(documentFile);
log.info("Starting OCR");
long ocrStart = System.currentTimeMillis(); long ocrStart = System.currentTimeMillis();
Statistics stats = runOcr(tmpDir, documentFile, viewerDocumentFile, fileId, dossierId, analyzeResultFile, features).getStatistics(); Statistics stats = runOcr(tmpDir, documentFile, viewerDocumentFile, fileId, dossierId, idpResultFile, features).getStatistics();
long ocrEnd = System.currentTimeMillis(); long ocrEnd = System.currentTimeMillis();
log.info("OCR successful, took {}", humanizeDuration(ocrEnd - ocrStart)); log.info("OCR successful, took {}", humanizeDuration(ocrEnd - ocrStart));
@ -126,14 +125,14 @@ public class OCRService {
File viewerDocumentFile, File viewerDocumentFile,
String fileId, String fileId,
String dossierId, String dossierId,
File analyzeResultFile, File idpResultFile,
Set<AzureOcrFeature> features) { Set<AzureOcrFeature> features) {
try (var in = new FileInputStream(documentFile); PDFDoc pdfDoc = new PDFDoc(in)) { try (var in = new FileInputStream(documentFile); PDFDoc pdfDoc = new PDFDoc(in)) {
OCGWatermarkRemovalService.removeWatermarks(pdfDoc); OCGWatermarkRemovalService.removeWatermarks(pdfDoc);
OcrExecutionSupervisor supervisor = new OcrExecutionSupervisor(pdfDoc.getPageCount(), ocrMessageSender, fileId, settings); OcrExecutionSupervisor supervisor = new OcrExecutionSupervisor(pdfDoc.getPageCount(), ocrMessageSender, fileId, settings, features);
supervisor.getStatistics().setStart(); supervisor.getStatistics().setStart();
List<PageBatch> batches = batchFactory.splitIntoBatches(pdfDoc, supervisor, features, runDir); List<PageBatch> batches = batchFactory.splitIntoBatches(pdfDoc, supervisor, features, runDir);
@ -149,7 +148,7 @@ public class OCRService {
} }
if (features.contains(AzureOcrFeature.IDP)) { if (features.contains(AzureOcrFeature.IDP)) {
saveAnalyzeResultFile(analyzeResultFile, ocrResult); saveIdpResultFile(idpResultFile, ocrResult);
} }
supervisor.getStatistics().drawingPdfFinished(); supervisor.getStatistics().drawingPdfFinished();
@ -162,9 +161,9 @@ public class OCRService {
} }
private void saveAnalyzeResultFile(File analyzeResultFile, OcrResult ocrResult) throws IOException { private void saveIdpResultFile(File idpResultFile, OcrResult ocrResult) throws IOException {
try (var out = new FileOutputStream(analyzeResultFile)) { try (var out = new FileOutputStream(idpResultFile)) {
mapper.writeValue(out, ocrResult.idpResult()); mapper.writeValue(out, ocrResult.idpResult());
} }
} }

View File

@ -14,6 +14,7 @@ import java.util.concurrent.CountDownLatch;
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings; import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
import com.knecon.fforesight.service.ocr.processor.model.PageBatch; import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
import com.knecon.fforesight.service.ocr.processor.model.Statistics; import com.knecon.fforesight.service.ocr.processor.model.Statistics;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import lombok.AccessLevel; import lombok.AccessLevel;
import lombok.Getter; import lombok.Getter;
@ -39,12 +40,15 @@ public class OcrExecutionSupervisor {
String fileId; String fileId;
Set<AzureOcrFeature> features;
public OcrExecutionSupervisor(int totalPageCount, IOcrMessageSender ocrMessageSender, String fileId, OcrServiceSettings settings) {
public OcrExecutionSupervisor(int totalPageCount, IOcrMessageSender ocrMessageSender, String fileId, OcrServiceSettings settings, Set<AzureOcrFeature> features) {
this.totalPageCount = totalPageCount; this.totalPageCount = totalPageCount;
this.ocrMessageSender = ocrMessageSender; this.ocrMessageSender = ocrMessageSender;
this.fileId = fileId; this.fileId = fileId;
this.features = features;
this.errorPages = Collections.synchronizedSet(new HashSet<>()); this.errorPages = Collections.synchronizedSet(new HashSet<>());
this.countDownPagesToProcess = new CountDownLatch(totalPageCount); this.countDownPagesToProcess = new CountDownLatch(totalPageCount);
this.statistics = new Statistics(); this.statistics = new Statistics();
@ -86,7 +90,7 @@ public class OcrExecutionSupervisor {
if (!statistics.getBatchStats(pageRange).isUploadFinished()) { if (!statistics.getBatchStats(pageRange).isUploadFinished()) {
log.info("Batch {}: Pages {} is in progress", pageRange.getIndex(), pageRange); log.info("Batch {}: Pages {} is in progress", pageRange.getIndex(), pageRange);
statistics.getBatchStats(pageRange).finishUpload(); statistics.getBatchStats(pageRange).finishUpload();
ocrMessageSender.sendUpdate(fileId, processedPages(), getTotalPageCount()); ocrMessageSender.sendUpdate(fileId, processedPages(), getTotalPageCount(), features);
} else { } else {
log.debug("Batch {}: Pages {} still in progress", pageRange.getIndex(), pageRange); log.debug("Batch {}: Pages {} still in progress", pageRange.getIndex(), pageRange);
} }
@ -97,14 +101,15 @@ public class OcrExecutionSupervisor {
batch.forEach(pageIndex -> countDownPagesToProcess.countDown()); batch.forEach(pageIndex -> countDownPagesToProcess.countDown());
statistics.getBatchStats(batch).finishMappingResult(); statistics.getBatchStats(batch).finishMappingResult();
ocrMessageSender.sendUpdate(fileId, this.processedPages(), getTotalPageCount()); ocrMessageSender.sendUpdate(fileId, this.processedPages(), getTotalPageCount(), features);
log.info("Batch {}: Finished mapping result with pages {}", batch.getIndex(), batch);
} }
public void logPageSkipped(Integer pageIndex) { public void logPageSkipped(Integer pageIndex) {
this.countDownPagesToProcess.countDown(); this.countDownPagesToProcess.countDown();
ocrMessageSender.sendUpdate(fileId, this.processedPages(), getTotalPageCount()); ocrMessageSender.sendUpdate(fileId, this.processedPages(), getTotalPageCount(), features);
log.debug("{}/{}: No images to ocr on page {}", processedPages(), getTotalPageCount(), pageIndex); log.debug("{}/{}: No images to ocr on page {}", processedPages(), getTotalPageCount(), pageIndex);
} }
@ -114,7 +119,7 @@ public class OcrExecutionSupervisor {
this.errorPages.add(batch); this.errorPages.add(batch);
batch.forEach(pageIndex -> this.countDownPagesToProcess.countDown()); batch.forEach(pageIndex -> this.countDownPagesToProcess.countDown());
ocrMessageSender.sendUpdate(fileId, this.processedPages(), getTotalPageCount()); ocrMessageSender.sendUpdate(fileId, this.processedPages(), getTotalPageCount(), features);
log.error("{}/{}: Error occurred in batch {} with pages {}", processedPages(), getTotalPageCount(), batch.getIndex(), batch, e); log.error("{}/{}: Error occurred in batch {} with pages {}", processedPages(), getTotalPageCount(), batch.getIndex(), batch, e);
} }
@ -167,7 +172,7 @@ public class OcrExecutionSupervisor {
requireNoErrors(); requireNoErrors();
log.info("{}/{}: Finished OCR on all pages", getTotalPageCount(), getTotalPageCount()); log.info("{}/{}: Finished OCR on all pages", getTotalPageCount(), getTotalPageCount());
ocrMessageSender.sendOcrFinished(fileId, getTotalPageCount()); ocrMessageSender.sendOcrFinished(fileId, getTotalPageCount(), features);
} }

View File

@ -18,13 +18,13 @@ import java.util.stream.Stream;
import com.azure.ai.documentintelligence.models.AnalyzeResult; import com.azure.ai.documentintelligence.models.AnalyzeResult;
import com.azure.ai.documentintelligence.models.BoundingRegion; import com.azure.ai.documentintelligence.models.BoundingRegion;
import com.azure.ai.documentintelligence.models.DocumentFontStyle;
import com.azure.ai.documentintelligence.models.DocumentPage; import com.azure.ai.documentintelligence.models.DocumentPage;
import com.azure.ai.documentintelligence.models.DocumentSpan; import com.azure.ai.documentintelligence.models.DocumentSpan;
import com.azure.ai.documentintelligence.models.DocumentStyle; import com.azure.ai.documentintelligence.models.DocumentStyle;
import com.azure.ai.documentintelligence.models.DocumentTable; import com.azure.ai.documentintelligence.models.DocumentTable;
import com.azure.ai.documentintelligence.models.DocumentTableCell; import com.azure.ai.documentintelligence.models.DocumentTableCell;
import com.azure.ai.documentintelligence.models.DocumentWord; import com.azure.ai.documentintelligence.models.DocumentWord;
import com.azure.ai.documentintelligence.models.FontWeight;
import com.google.common.base.Functions; import com.google.common.base.Functions;
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings; import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
import com.knecon.fforesight.service.ocr.processor.model.ImageFile; import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
@ -32,10 +32,10 @@ import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
import com.knecon.fforesight.service.ocr.processor.model.PageInformation; import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
import com.knecon.fforesight.service.ocr.processor.model.SpanLookup; import com.knecon.fforesight.service.ocr.processor.model.SpanLookup;
import com.knecon.fforesight.service.ocr.processor.model.TextPositionInImage; import com.knecon.fforesight.service.ocr.processor.model.TextPositionInImage;
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.BBoxSnuggificationService;
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.FontStyleDetector; import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.FontStyleDetector;
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline; import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor; import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.BBoxSnuggificationService;
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.StrokeWidthCalculator; import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.StrokeWidthCalculator;
import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResult; import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResult;
import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontMetricsProvider; import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontMetricsProvider;
@ -323,16 +323,13 @@ public class OcrResultPostProcessingPipeline {
return Lookups.empty(); return Lookups.empty();
} }
SpanLookup<DocumentSpan> boldLookup = new SpanLookup<>(analyzeResult.getStyles() // Azure stopped supporting bold text detection in 1.0.0 release
.stream() SpanLookup<DocumentSpan> boldLookup = new SpanLookup<>(Stream.empty(), Function.identity());
.filter(style -> Objects.equals(style.getFontWeight(), FontWeight.BOLD))
.map(DocumentStyle::getSpans)
.flatMap(Collection::stream), Function.identity());
SpanLookup<DocumentSpan> italicLookup = new SpanLookup<>(analyzeResult.getStyles() SpanLookup<DocumentSpan> italicLookup = new SpanLookup<>(analyzeResult.getStyles()
.stream() .stream()
.filter(style -> Objects.equals(style.getFontStyle(), .filter(style -> Objects.equals(style.getFontStyle(),
com.azure.ai.documentintelligence.models.FontStyle.ITALIC)) DocumentFontStyle.ITALIC))
.map(DocumentStyle::getSpans) .map(DocumentStyle::getSpans)
.flatMap(Collection::stream), Functions.identity()); .flatMap(Collection::stream), Functions.identity());

View File

@ -31,7 +31,7 @@ public class BBoxSnuggificationService {
private static final double AVERAGE_ANGLE_THRESHOLD = 0.2; // Skips snuggification, if the average remaining word rotation of a word, written from left-to-right is bigger than this private static final double AVERAGE_ANGLE_THRESHOLD = 0.2; // Skips snuggification, if the average remaining word rotation of a word, written from left-to-right is bigger than this
public static final int INDIVIDUAL_ANGLE_THRESHOLD = 5; // skips snuggification for word, if the remaining rotation is larger than this angle public static final int INDIVIDUAL_ANGLE_THRESHOLD = 5; // skips snuggification for word, if the remaining rotation is larger than this angle
public static final int MAX_SHRINK_PIXELS = 40; // Number of pixels that are allowed to be removed from the top or bottom of an image public static final int MAX_SHRINK_PIXELS = 40; // Number of pixels that are allowed to be removed from the top or bottom of an image
private static final int MINIMUM_WORD_Pixels = 5; private static final int MINIMUM_WORD_PIXELS = 5; // Number of pixels that are required for snuggification
private enum Operation { private enum Operation {
HORIZONTAL, HORIZONTAL,
@ -48,6 +48,11 @@ public class BBoxSnuggificationService {
return Optional.empty(); return Optional.empty();
} }
if (origin.getContent().equals("-") || origin.getContent().equals(",")) {
// very slim characters should not be snuggified, or the fontsize may be off significantly
return Optional.empty();
}
QuadPoint originTransformed = QuadPoint.fromPolygons(origin.getPolygon()).getTransformed(resultToImageTransform); QuadPoint originTransformed = QuadPoint.fromPolygons(origin.getPolygon()).getTransformed(resultToImageTransform);
double remainingAngle = Math.abs(RotationCorrectionUtility.getRemainingAngle(originTransformed.getAngle())); double remainingAngle = Math.abs(RotationCorrectionUtility.getRemainingAngle(originTransformed.getAngle()));
QuadPoint.Direction direction = originTransformed.getDirection(); QuadPoint.Direction direction = originTransformed.getDirection();
@ -133,7 +138,7 @@ public class BBoxSnuggificationService {
if (start == 0 && end == wordImage.w) { if (start == 0 && end == wordImage.w) {
return Optional.empty(); return Optional.empty();
} }
if (Math.abs(start - end) < MINIMUM_WORD_Pixels) { if (Math.abs(start - end) < MINIMUM_WORD_PIXELS) {
return Optional.empty(); return Optional.empty();
} }
return Optional.of(new Rectangle2D.Double(origin.getX() + start, origin.getY(), origin.getWidth() - start - (wordImage.w - end), origin.getHeight())); return Optional.of(new Rectangle2D.Double(origin.getX() + start, origin.getY(), origin.getWidth() - start - (wordImage.w - end), origin.getHeight()));
@ -159,7 +164,7 @@ public class BBoxSnuggificationService {
if (start == 0 && end == wordImage.h) { if (start == 0 && end == wordImage.h) {
return Optional.empty(); return Optional.empty();
} }
if (Math.abs(start - end) < MINIMUM_WORD_Pixels) { if (Math.abs(start - end) < MINIMUM_WORD_PIXELS) {
return Optional.empty(); return Optional.empty();
} }
return Optional.of(new Rectangle2D.Double(origin.getX(), origin.getY() + start, origin.getWidth(), origin.getHeight() - start - (wordImage.h - end))); return Optional.of(new Rectangle2D.Double(origin.getX(), origin.getY() + start, origin.getWidth(), origin.getHeight() - start - (wordImage.h - end)));

View File

@ -15,7 +15,6 @@ import java.util.regex.Pattern;
import org.slf4j.MDC; import org.slf4j.MDC;
import com.knecon.fforesight.service.ocr.processor.model.ImageFile; import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
import lombok.AccessLevel; import lombok.AccessLevel;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
@ -76,13 +75,14 @@ public class GhostScriptOutputHandler extends Thread {
if (line == null) { if (line == null) {
break; break;
} }
switch (type) {
if (type.equals(Type.ERROR)) { case STD_OUT -> {
log.error("{}_{}>{}", processName, type.name(), line); log.debug("Batch {}: {}_{}>{}", batchIdx, processName, type.name(), line);
} else { addProcessedImageToQueue(line);
log.debug("{}_{}>{}", processName, type.name(), line); }
addProcessedImageToQueue(line); case ERROR -> log.error("Batch {}: {}_{}>{}", batchIdx, processName, type.name(), line);
} }
} }
} }
is.close(); is.close();
@ -92,7 +92,7 @@ public class GhostScriptOutputHandler extends Thread {
if (!pagesToProcess.isEmpty()) { if (!pagesToProcess.isEmpty()) {
errorHandler.accept(String.format("Ghostscript finished for batch %d, but pages %s remain unprocessed.", batchIdx, formatPagesToProcess())); errorHandler.accept(String.format("Ghostscript finished for batch %d, but pages %s remain unprocessed.", batchIdx, formatPagesToProcess()));
} else { } else {
log.info("Batch: {} rendered successfully!", batchIdx); log.info("Batch {}: rendered successfully!", batchIdx);
} }
} }

View File

@ -6,6 +6,7 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.concurrent.Semaphore; import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer; import java.util.function.Consumer;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
@ -15,14 +16,12 @@ import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
import com.knecon.fforesight.service.ocr.processor.model.PageBatch; import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
import lombok.AccessLevel; import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows; import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults; import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
@Slf4j @Slf4j
@Service @Service
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
@SuppressWarnings("PMD") // can't figure out how to safely close the stdOut and stdError streams in line 72/74 @SuppressWarnings("PMD") // can't figure out how to safely close the stdOut and stdError streams in line 72/74
public class GhostScriptService { public class GhostScriptService {
@ -34,6 +33,37 @@ public class GhostScriptService {
private Semaphore concurrencySemaphore = new Semaphore(3); private Semaphore concurrencySemaphore = new Semaphore(3);
public GhostScriptService(OcrServiceSettings ocrServiceSettings) {
this.ocrServiceSettings = ocrServiceSettings;
assertGhostscriptIsInstalled();
}
private void assertGhostscriptIsInstalled() {
try {
Process p = Runtime.getRuntime().exec("gs -v");
InputStream stdOut = p.getInputStream();
InputStream errOut = p.getErrorStream();
assert p.waitFor(1, TimeUnit.SECONDS);
log.info("Ghostscript is installed.");
String out = new String(stdOut.readAllBytes());
String error = new String(errOut.readAllBytes());
for (String line : out.split("\n")) {
log.info(line);
}
if (!error.isBlank()) {
log.error(error);
}
} catch (Exception e) {
log.error("Ghostscript is not installed!");
log.error(e.getMessage(), e);
throw new RuntimeException(e);
}
}
@SneakyThrows @SneakyThrows
public void startBatchRender(PageBatch batch, ImageProcessingSupervisor supervisor, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) { public void startBatchRender(PageBatch batch, ImageProcessingSupervisor supervisor, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) {
@ -49,7 +79,7 @@ public class GhostScriptService {
concurrencySemaphore.acquire(); concurrencySemaphore.acquire();
log.info("Batch {}: starting GhostScript rendering with page(s) {}", batch.getIndex(), batch); log.info("Batch {}: starting GhostScript rendering with page(s) {}", batch.getIndex(), batch);
executeProcess(batch.getIndex(), buildCmdArgs(batch, batch.getBatchDoc()), successHandler, errorHandler); executeProcess(batch, buildCmdArgs(batch, batch.getBatchDoc()), successHandler, errorHandler);
} }
@ -76,27 +106,27 @@ public class GhostScriptService {
@SneakyThrows @SneakyThrows
private void executeProcess(int batchIdx, ProcessCmdsAndRenderedImageFiles processInfo, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) { private void executeProcess(PageBatch batch, ProcessCmdsAndRenderedImageFiles processInfo, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) {
Process p = Runtime.getRuntime().exec(processInfo.cmdArgs()); Process p = Runtime.getRuntime().exec(processInfo.cmdArgs());
InputStream stdOut = p.getInputStream(); InputStream stdOut = p.getInputStream();
GhostScriptOutputHandler stdOutLogger = GhostScriptOutputHandler.stdOut(batchIdx, stdOut, processInfo.renderedPageImageFiles(), successHandler, errorHandler); GhostScriptOutputHandler stdOutLogger = GhostScriptOutputHandler.stdOut(batch.getIndex(), stdOut, processInfo.renderedPageImageFiles(), successHandler, errorHandler);
InputStream stdError = p.getErrorStream(); InputStream stdError = p.getErrorStream();
GhostScriptOutputHandler stdErrorLogger = GhostScriptOutputHandler.stdError(batchIdx, stdError, errorHandler); GhostScriptOutputHandler stdErrorLogger = GhostScriptOutputHandler.stdError(batch.getIndex(), stdError, errorHandler);
stdOutLogger.start(); stdOutLogger.start();
stdErrorLogger.start(); stdErrorLogger.start();
handleFinished(p); handleFinished(p, errorHandler, batch, successHandler);
} }
private void handleFinished(Process p) { private void handleFinished(Process p, Consumer<String> errorHandler, PageBatch batch, Consumer<ImageFile> successHandler) {
Thread finishedThread = new Thread(() -> { Thread finishedThread = new Thread(() -> {
try { try {
p.waitFor(); p.waitFor(2, TimeUnit.MINUTES);
} catch (InterruptedException e) { } catch (InterruptedException e) {
log.error("GhostScript process was interrupted", e); errorHandler.accept("Batch %d: Ghostscript rendering has been terminated after 2 minutes \n %s".formatted(batch.getIndex(), e.getMessage()));
} finally { } finally {
concurrencySemaphore.release(); concurrencySemaphore.release();
} }

View File

@ -87,7 +87,7 @@ public class ImageProcessingService {
LeptUtils.disposePix(processedPix); LeptUtils.disposePix(processedPix);
} }
} catch (Exception e) { } catch (Exception e) {
supervisor.markError(e.getMessage()); supervisor.markError("Page %d could not be processed due to: %s".formatted(unprocessedImage.pageNumber(), e.getMessage()));
} finally { } finally {
supervisor.markPageFinished(processedImage); supervisor.markPageFinished(processedImage);
log.debug("Finished page: {}", processedImage.pageNumber()); log.debug("Finished page: {}", processedImage.pageNumber());

View File

@ -69,6 +69,7 @@ public class ImageProcessingSupervisor {
public void markError(String errorMessage) { public void markError(String errorMessage) {
log.error(errorMessage);
this.errors.add(errorMessage); this.errors.add(errorMessage);
} }

View File

@ -12,8 +12,6 @@ import com.azure.ai.documentintelligence.models.DocumentBarcode;
import com.azure.ai.documentintelligence.models.DocumentFigure; import com.azure.ai.documentintelligence.models.DocumentFigure;
import com.azure.ai.documentintelligence.models.DocumentKeyValuePair; import com.azure.ai.documentintelligence.models.DocumentKeyValuePair;
import com.azure.ai.documentintelligence.models.DocumentLine; import com.azure.ai.documentintelligence.models.DocumentLine;
import com.azure.ai.documentintelligence.models.DocumentList;
import com.azure.ai.documentintelligence.models.DocumentListItem;
import com.azure.ai.documentintelligence.models.DocumentParagraph; import com.azure.ai.documentintelligence.models.DocumentParagraph;
import com.azure.ai.documentintelligence.models.DocumentSection; import com.azure.ai.documentintelligence.models.DocumentSection;
import com.azure.ai.documentintelligence.models.DocumentTable; import com.azure.ai.documentintelligence.models.DocumentTable;
@ -23,8 +21,8 @@ import com.azure.ai.documentintelligence.models.DocumentWord;
import com.azure.ai.documentintelligence.models.ParagraphRole; import com.azure.ai.documentintelligence.models.ParagraphRole;
import com.knecon.fforesight.service.ocr.processor.model.PageBatch; import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
import com.knecon.fforesight.service.ocr.processor.model.SpanLookup; import com.knecon.fforesight.service.ocr.processor.model.SpanLookup;
import com.knecon.fforesight.service.ocr.processor.visualizations.utils.Rectangle2DBBoxCollector;
import com.knecon.fforesight.service.ocr.processor.visualizations.utils.LineUtils; import com.knecon.fforesight.service.ocr.processor.visualizations.utils.LineUtils;
import com.knecon.fforesight.service.ocr.processor.visualizations.utils.Rectangle2DBBoxCollector;
import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint; import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint;
import com.knecon.fforesight.service.viewerdoc.layers.IdpLayerConfig; import com.knecon.fforesight.service.viewerdoc.layers.IdpLayerConfig;
import com.knecon.fforesight.service.viewerdoc.model.ColoredLine; import com.knecon.fforesight.service.viewerdoc.model.ColoredLine;
@ -69,14 +67,6 @@ public class IdpLayer extends IdpLayerConfig {
} }
public void addList(DocumentList list, PageBatch pageOffset) {
for (DocumentListItem item : list.getItems()) {
addBoundingRegion(item.getBoundingRegions(), lists, PARAGRAPH_COLOR, pageOffset);
}
}
public void addBarcode(int pageNumber, DocumentBarcode barcode) { public void addBarcode(int pageNumber, DocumentBarcode barcode) {
addPolygon(pageNumber, barcode.getPolygon(), barcodes, IMAGE_COLOR); addPolygon(pageNumber, barcode.getPolygon(), barcodes, IMAGE_COLOR);
@ -85,8 +75,11 @@ public class IdpLayer extends IdpLayerConfig {
public void addKeyValue(DocumentKeyValuePair keyValue, PageBatch pageOffset) { public void addKeyValue(DocumentKeyValuePair keyValue, PageBatch pageOffset) {
if (keyValue.getKey() == null || keyValue.getKey().getContent().isEmpty()) {
return;
}
addBoundingRegion(keyValue.getKey().getBoundingRegions(), keyValuePairs, KEY_COLOR, pageOffset); addBoundingRegion(keyValue.getKey().getBoundingRegions(), keyValuePairs, KEY_COLOR, pageOffset);
if (keyValue.getValue() != null) { if (keyValue.getValue() != null && !keyValue.getValue().getContent().isEmpty()) {
addBoundingRegion(keyValue.getValue().getBoundingRegions(), keyValuePairs, VALUE_COLOR, pageOffset); addBoundingRegion(keyValue.getValue().getBoundingRegions(), keyValuePairs, VALUE_COLOR, pageOffset);
if (keyValue.getKey().getBoundingRegions() if (keyValue.getKey().getBoundingRegions()

View File

@ -3,7 +3,6 @@ package com.knecon.fforesight.service.ocr.processor.visualizations.layers;
import static com.knecon.fforesight.service.ocr.processor.utils.StringCleaningUtility.cleanString; import static com.knecon.fforesight.service.ocr.processor.utils.StringCleaningUtility.cleanString;
import java.awt.geom.AffineTransform; import java.awt.geom.AffineTransform;
import java.util.Collections;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -23,9 +22,9 @@ import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
import com.knecon.fforesight.service.ocr.processor.model.PageInformation; import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
import com.knecon.fforesight.service.ocr.processor.visualizations.utils.Rectangle2DBBoxCollector; import com.knecon.fforesight.service.ocr.processor.visualizations.utils.Rectangle2DBBoxCollector;
import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility; import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
import com.knecon.fforesight.service.ocr.v1.api.model.IdpResult;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature; import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import com.knecon.fforesight.service.ocr.v1.api.model.Figure; import com.knecon.fforesight.service.ocr.v1.api.model.Figure;
import com.knecon.fforesight.service.ocr.v1.api.model.IdpResult;
import com.knecon.fforesight.service.ocr.v1.api.model.KeyValuePair; import com.knecon.fforesight.service.ocr.v1.api.model.KeyValuePair;
import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint; import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint;
import com.knecon.fforesight.service.ocr.v1.api.model.Region; import com.knecon.fforesight.service.ocr.v1.api.model.Region;
@ -65,11 +64,14 @@ public class IdpResultFactory {
public AffineTransform getResultToPageTransform(Integer pageNumber) { public AffineTransform getResultToPageTransform(Integer pageNumber) {
AffineTransform transform = resultToPageTransforms.get(pageNumber); AffineTransform transform;
if (rotationCorrection) { if (rotationCorrection) {
PageInformation page = pageInformation.get(pageNumber); PageInformation page = pageInformation.get(pageNumber);
transform.preConcatenate(RotationCorrectionUtility.buildTransform(angles.get(pageNumber), page.width(), page.height(), false)); transform = RotationCorrectionUtility.buildTransform(-angles.get(pageNumber), page.cropBox().getWidth(), page.cropBox().getHeight(), false);
} else {
transform = new AffineTransform();
} }
transform.concatenate(resultToPageTransforms.get(pageNumber));
return transform; return transform;
} }
@ -77,12 +79,18 @@ public class IdpResultFactory {
public void addAnalyzeResult(AnalyzeResult analyzeResult, PageBatch batch) { public void addAnalyzeResult(AnalyzeResult analyzeResult, PageBatch batch) {
DocumentSpanLookup words = new DocumentSpanLookup(analyzeResult); DocumentSpanLookup words = new DocumentSpanLookup(analyzeResult);
analyzeResult.getTables() if (analyzeResult.getTables() != null) {
.forEach(documentTable -> addTable(documentTable, words, batch)); analyzeResult.getTables()
analyzeResult.getKeyValuePairs() .forEach(documentTable -> addTable(documentTable, words, batch));
.forEach(documentKeyValuePair -> addKeyValuePair(documentKeyValuePair, batch)); }
analyzeResult.getFigures() if (analyzeResult.getKeyValuePairs() != null) {
.forEach(documentFigure -> addFigure(documentFigure, batch, words)); analyzeResult.getKeyValuePairs()
.forEach(documentKeyValuePair -> addKeyValuePair(documentKeyValuePair, batch));
}
if (analyzeResult.getFigures() != null) {
analyzeResult.getFigures()
.forEach(documentFigure -> addFigure(documentFigure, batch, words));
}
} }
@ -108,12 +116,12 @@ public class IdpResultFactory {
private void addKeyValuePair(DocumentKeyValuePair documentKeyValuePair, PageBatch batch) { private void addKeyValuePair(DocumentKeyValuePair documentKeyValuePair, PageBatch batch) {
TextRegion key = null; TextRegion key = null;
if (documentKeyValuePair.getKey() != null) { if (documentKeyValuePair.getKey() != null && !documentKeyValuePair.getKey().getContent().isEmpty()) {
Region region = toRegionFromRegions(batch, documentKeyValuePair.getKey().getBoundingRegions()); Region region = toRegionFromRegions(batch, documentKeyValuePair.getKey().getBoundingRegions());
key = new TextRegion(region, cleanString(documentKeyValuePair.getKey().getContent())); key = new TextRegion(region, cleanString(documentKeyValuePair.getKey().getContent()));
} }
TextRegion value = null; TextRegion value = null;
if (documentKeyValuePair.getValue() != null) { if (documentKeyValuePair.getValue() != null && !documentKeyValuePair.getValue().getContent().isEmpty()) {
Region region = toRegionFromRegions(batch, documentKeyValuePair.getValue().getBoundingRegions()); Region region = toRegionFromRegions(batch, documentKeyValuePair.getValue().getBoundingRegions());
value = new TextRegion(region, cleanString(documentKeyValuePair.getValue().getContent())); value = new TextRegion(region, cleanString(documentKeyValuePair.getValue().getContent()));
} }
@ -204,6 +212,9 @@ public class IdpResultFactory {
private Region toRegionFromRegions(int pageNumber, List<BoundingRegion> regions) { private Region toRegionFromRegions(int pageNumber, List<BoundingRegion> regions) {
if (regions.size() == 1) {
return new Region(pageNumber, QuadPoint.fromPolygons(regions.get(0).getPolygon()).getTransformed(getResultToPageTransform(pageNumber)).toData());
}
QuadPoint bbox = QuadPoint.fromRectangle2D(regions.stream() QuadPoint bbox = QuadPoint.fromRectangle2D(regions.stream()
.map(BoundingRegion::getPolygon) .map(BoundingRegion::getPolygon)
.map(QuadPoint::fromPolygons) .map(QuadPoint::fromPolygons)
@ -224,13 +235,7 @@ public class IdpResultFactory {
throw new AssertionError(); throw new AssertionError();
} }
int pageNumber = batch.getPageNumber(batchPageNumber); int pageNumber = batch.getPageNumber(batchPageNumber);
QuadPoint bbox = QuadPoint.fromRectangle2D(regions.stream() return toRegionFromRegions(pageNumber, regions);
.map(BoundingRegion::getPolygon)
.map(QuadPoint::fromPolygons)
.map(qp -> qp.getTransformed(getResultToPageTransform(pageNumber)).getBounds2D())
.collect(new Rectangle2DBBoxCollector()));
return new Region(pageNumber, bbox.toData());
} }
} }

View File

@ -67,10 +67,8 @@ public class LayerFactory {
ocrDebugLayerFactory.addAnalysisResult(results); ocrDebugLayerFactory.addAnalysisResult(results);
} }
if (features.contains(AzureOcrFeature.IDP)) { if (features.contains(AzureOcrFeature.IDP)) {
log.info("Batch {}: Start building IDP stuff", batch.getIndex());
idpLayerFactory.addAnalyzeResult(analyzeResult, batch); idpLayerFactory.addAnalyzeResult(analyzeResult, batch);
idpResultFactory.addAnalyzeResult(analyzeResult, batch); idpResultFactory.addAnalyzeResult(analyzeResult, batch);
log.info("Batch {}: Finished building IDP stuff", batch.getIndex());
} }
this.supervisor.finishMappingResult(batch); this.supervisor.finishMappingResult(batch);

View File

@ -85,12 +85,14 @@ public class RotationCorrectionUtility {
List<String> commands = new LinkedList<>(); List<String> commands = new LinkedList<>();
double scale = getScalingFactor(angle, page); double scale = getScalingFactor(angle, page);
double x = page.getCropBox().getWidth() / 2;
double y = page.getCropBox().getHeight() / 2;
commands.add("q"); commands.add("q");
commands.add("/%s <<>> BDC".formatted(KNECON_ROTATION_CORRECTION.markedContentName())); commands.add("/%s <<>> BDC".formatted(KNECON_ROTATION_CORRECTION.markedContentName()));
commands.add(buildMatrixCommands(AffineTransform.getTranslateInstance(page.getPageWidth() / 2, page.getPageHeight() / 2))); commands.add(buildMatrixCommands(AffineTransform.getTranslateInstance(x, y)));
commands.add(buildMatrixCommands(AffineTransform.getRotateInstance(Math.toRadians(angle)))); commands.add(buildMatrixCommands(AffineTransform.getRotateInstance(Math.toRadians(angle))));
commands.add(buildMatrixCommands(AffineTransform.getScaleInstance(scale, scale))); commands.add(buildMatrixCommands(AffineTransform.getScaleInstance(scale, scale)));
commands.add(buildMatrixCommands(AffineTransform.getTranslateInstance(-page.getPageWidth() / 2, -page.getPageHeight() / 2))); commands.add(buildMatrixCommands(AffineTransform.getTranslateInstance(-x, -y)));
commands.add("EMC"); commands.add("EMC");
return String.join("\n", commands); return String.join("\n", commands);
} }
@ -197,7 +199,7 @@ public class RotationCorrectionUtility {
public static double getRemainingAngle(double angle, int quadrants) { public static double getRemainingAngle(double angle, int quadrants) {
double referenceAngle = 90 * quadrants; double referenceAngle = 90 * quadrants;
return angle - referenceAngle; return (angle - referenceAngle) % 360;
} }

View File

@ -1,10 +1,13 @@
package com.knecon.fforesight.service.ocr.v1.server.queue; package com.knecon.fforesight.service.ocr.v1.server.queue;
import java.util.Set;
import org.springframework.amqp.rabbit.core.RabbitTemplate; import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.ocr.processor.service.IOcrMessageSender; import com.knecon.fforesight.service.ocr.processor.service.IOcrMessageSender;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest; import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
import com.knecon.fforesight.service.ocr.v1.server.configuration.MessagingConfiguration; import com.knecon.fforesight.service.ocr.v1.server.configuration.MessagingConfiguration;
import com.knecon.fforesight.tenantcommons.TenantContext; import com.knecon.fforesight.tenantcommons.TenantContext;
@ -22,24 +25,24 @@ public class NoStatusUpdateOcrMessageSender implements IOcrMessageSender {
RabbitTemplate rabbitTemplate; RabbitTemplate rabbitTemplate;
public void sendOcrFinished(String fileId, int totalImages) { public void sendOcrFinished(String fileId, int totalImages, Set<AzureOcrFeature> features) {
} }
public void sendOCRStarted(String fileId) { public void sendOCRStarted(String fileId, Set<AzureOcrFeature> features) {
} }
public void sendUpdate(String fileId, int finishedImages, int totalImages) { public void sendUpdate(String fileId, int finishedImages, int totalImages, Set<AzureOcrFeature> features) {
} }
public void sendOcrResponse(String dossierId, String fileId) { public void sendOcrResponse(DocumentRequest request) {
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_RESPONSE_EXCHANGE, TenantContext.getTenantId(), new DocumentRequest(dossierId, fileId)); rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_RESPONSE_EXCHANGE, TenantContext.getTenantId(), request);
} }
} }

View File

@ -5,6 +5,8 @@ import java.io.IOException;
import java.nio.file.Path; import java.nio.file.Path;
import java.time.OffsetDateTime; import java.time.OffsetDateTime;
import java.time.temporal.ChronoUnit; import java.time.temporal.ChronoUnit;
import java.util.Objects;
import java.util.stream.Collectors;
import org.slf4j.MDC; import org.slf4j.MDC;
import org.springframework.amqp.AmqpRejectAndDontRequeueException; import org.springframework.amqp.AmqpRejectAndDontRequeueException;
@ -34,6 +36,9 @@ import lombok.extern.slf4j.Slf4j;
public class OcrMessageReceiver { public class OcrMessageReceiver {
public static final String OCR_REQUEST_LISTENER_ID = "ocr-request-listener"; public static final String OCR_REQUEST_LISTENER_ID = "ocr-request-listener";
public static final String IDP_RESULT_FILE_NAME = "idpResult.json";
public static final String VIEWER_DOCUMENT_FILE_NAME = "viewerDocument.pdf";
public static final String DOCUMENT_FILE_NAME = "document.pdf";
FileStorageService fileStorageService; FileStorageService fileStorageService;
ObjectMapper objectMapper; ObjectMapper objectMapper;
@ -56,21 +61,21 @@ public class OcrMessageReceiver {
try { try {
MDC.put("fileId", fileId); MDC.put("fileId", fileId);
log.info("--------------------------------------------------------------------------"); log.info("--------------------------------- Starting OCR ---------------------------------");
log.info("Features: {}", request.getFeatures().stream().map(Objects::toString).collect(Collectors.joining(", ")));
ocrMessageSender.sendOCRStarted(fileId, request.getFeatures());
ocrMessageSender.sendOCRStarted(fileId); File documentFile = runDir.resolve(DOCUMENT_FILE_NAME).toFile();
File viewerDocumentFile = runDir.resolve(VIEWER_DOCUMENT_FILE_NAME).toFile();
File documentFile = runDir.resolve("document.pdf").toFile(); File idpResultFile = runDir.resolve(IDP_RESULT_FILE_NAME).toFile();
File viewerDocumentFile = runDir.resolve("viewerDocument.pdf").toFile();
File analyzeResultFile = runDir.resolve("azureAnalysisResult.json").toFile();
fileStorageService.downloadFiles(request, documentFile); fileStorageService.downloadFiles(request, documentFile);
ocrService.runOcrOnDocument(dossierId, fileId, request.getFeatures(), runDir, documentFile, viewerDocumentFile, analyzeResultFile); ocrService.runOcrOnDocument(dossierId, fileId, request.getFeatures(), runDir, documentFile, viewerDocumentFile, idpResultFile);
fileStorageService.storeFiles(request, documentFile, viewerDocumentFile, analyzeResultFile); fileStorageService.storeFiles(request, documentFile, viewerDocumentFile, idpResultFile);
ocrMessageSender.sendOcrResponse(dossierId, fileId); ocrMessageSender.sendOcrResponse(request);
} catch (Exception e) { } catch (Exception e) {
log.warn("An exception occurred in ocr file stage: {}", e.getMessage()); log.warn("An exception occurred in ocr file stage: {}", e.getMessage());
in.getMessageProperties().getHeaders().put(MessagingConfiguration.X_ERROR_INFO_HEADER, e.getMessage()); in.getMessageProperties().getHeaders().put(MessagingConfiguration.X_ERROR_INFO_HEADER, e.getMessage());
@ -78,6 +83,7 @@ public class OcrMessageReceiver {
throw new RuntimeException(e); throw new RuntimeException(e);
} finally { } finally {
log.info("Done"); log.info("Done");
log.info("--------------------------------- Done ---------------------------------");
MDC.remove("fileId"); MDC.remove("fileId");
FileSystemUtils.deleteRecursively(runDir); FileSystemUtils.deleteRecursively(runDir);
} }

View File

@ -1,10 +1,13 @@
package com.knecon.fforesight.service.ocr.v1.server.queue; package com.knecon.fforesight.service.ocr.v1.server.queue;
import java.util.Set;
import org.springframework.amqp.rabbit.core.RabbitTemplate; import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.ocr.processor.service.IOcrMessageSender; import com.knecon.fforesight.service.ocr.processor.service.IOcrMessageSender;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest; import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse; import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse;
import com.knecon.fforesight.service.ocr.v1.server.configuration.MessagingConfiguration; import com.knecon.fforesight.service.ocr.v1.server.configuration.MessagingConfiguration;
@ -25,35 +28,46 @@ public class OcrMessageSender implements IOcrMessageSender {
RabbitTemplate rabbitTemplate; RabbitTemplate rabbitTemplate;
public void sendOcrFinished(String fileId, int totalImages) { public void sendOcrFinished(String fileId, int totalImages, Set<AzureOcrFeature> features) {
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE, rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE,
TenantContext.getTenantId(), TenantContext.getTenantId(),
OCRStatusUpdateResponse.builder().fileId(fileId).numberOfPagesToOCR(totalImages).numberOfOCRedPages(totalImages).ocrFinished(true).build()); OCRStatusUpdateResponse.builder()
.fileId(fileId)
.numberOfPagesToOCR(totalImages)
.numberOfOCRedPages(totalImages)
.ocrFinished(true)
.features(features)
.build());
} }
public void sendOCRStarted(String fileId) { public void sendOCRStarted(String fileId, Set<AzureOcrFeature> features) {
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE, rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE,
TenantContext.getTenantId(), TenantContext.getTenantId(),
OCRStatusUpdateResponse.builder().fileId(fileId).ocrStarted(true).build()); OCRStatusUpdateResponse.builder().fileId(fileId).features(features).ocrStarted(true).build());
} }
public void sendUpdate(String fileId, int finishedImages, int totalImages) { public void sendUpdate(String fileId, int finishedImages, int totalImages, Set<AzureOcrFeature> features) {
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE, rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE,
TenantContext.getTenantId(), TenantContext.getTenantId(),
OCRStatusUpdateResponse.builder().fileId(fileId).numberOfPagesToOCR(totalImages).numberOfOCRedPages(finishedImages).build()); OCRStatusUpdateResponse.builder()
.fileId(fileId)
.features(features)
.numberOfPagesToOCR(totalImages)
.numberOfOCRedPages(finishedImages)
.build());
} }
public void sendOcrResponse(String dossierId, String fileId) { public void sendOcrResponse(DocumentRequest request) {
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_RESPONSE_EXCHANGE, TenantContext.getTenantId(), new DocumentRequest(dossierId, fileId)); rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_RESPONSE_EXCHANGE, TenantContext.getTenantId(), request);
} }
} }

View File

@ -1,5 +1,5 @@
# you can list packages # you can list packages
ghostscript=9.55.0~dfsg1-0ubuntu5.9 ghostscript=9.55.0~dfsg1-0ubuntu5.10
pkg-config pkg-config
zip zip
unzip unzip

View File

@ -0,0 +1,50 @@
package com.knecon.fforesight.service.ocr.v1.api.model;
import static org.junit.jupiter.api.Assertions.*;
import java.awt.geom.Point2D;
import java.awt.geom.Rectangle2D;
import org.junit.jupiter.api.Test;
class QuadPointTest {
@Test
public void testContains() {
var a = new Point2D.Double(0, 0);
var b = new Point2D.Double(0, 1);
var c = new Point2D.Double(1, 1);
var d = new Point2D.Double(1, 0);
var q = new QuadPoint(a, b, c, d);
assertTrue(q.isHorizontal());
assertFalse(q.isVertical());
assertTrue(q.contains(a));
assertTrue(q.contains(b));
assertTrue(q.contains(c));
assertTrue(q.contains(d));
var p = new Point2D.Double(0.5, 0.5);
assertTrue(q.contains(p));
var r = new Rectangle2D.Double(0.5, 0.5, 0.1, 0.1);
assertTrue(q.contains(r));
}
@Test
public void testCenter() {
var a = new Point2D.Double(0, 0);
var b = new Point2D.Double(1, 1);
var c = new Point2D.Double(2, 1);
var d = new Point2D.Double(1, 0);
var q = new QuadPoint(a, b, c, d);
assertTrue(q.isHorizontal());
assertFalse(q.isVertical());
assertEquals(QuadPoint.Direction.RIGHT, q.getDirection());
assertEquals(new Point2D.Double(1, 0.5), q.getCenter());
}
}

View File

@ -1,6 +1,9 @@
package com.knecon.fforesight.service.ocr.v1.server; package com.knecon.fforesight.service.ocr.v1.server;
import static com.iqser.red.pdftronlogic.commons.PdfTextExtraction.extractAllTextFromDocument; import static com.iqser.red.pdftronlogic.commons.PdfTextExtraction.extractAllTextFromDocument;
import static com.knecon.fforesight.service.ocr.v1.server.queue.OcrMessageReceiver.DOCUMENT_FILE_NAME;
import static com.knecon.fforesight.service.ocr.v1.server.queue.OcrMessageReceiver.IDP_RESULT_FILE_NAME;
import static com.knecon.fforesight.service.ocr.v1.server.queue.OcrMessageReceiver.VIEWER_DOCUMENT_FILE_NAME;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
@ -55,7 +58,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
@SneakyThrows @SneakyThrows
public void testOcrWithFile() { public void testOcrWithFile() {
testOCR("/home/kschuettler/Dokumente/402Study.pdf"); testOCR("/home/kschuettler/Dokumente/TestFiles/OCR/TestSet/VV-331340-first100.pdf");
} }
@ -108,9 +111,9 @@ public class OcrServiceIntegrationTest extends AbstractTest {
assert tmpDir.toFile().exists() || tmpDir.toFile().mkdirs(); assert tmpDir.toFile().exists() || tmpDir.toFile().mkdirs();
var documentFile = tmpDir.resolve(Path.of("document.pdf")); var documentFile = tmpDir.resolve(Path.of(DOCUMENT_FILE_NAME));
var viewerDocumentFile = tmpDir.resolve(Path.of("viewerDocument.pdf")); var viewerDocumentFile = tmpDir.resolve(Path.of(VIEWER_DOCUMENT_FILE_NAME));
var analyzeResultFile = tmpDir.resolve(Path.of("azureAnalysisResult.json")); var analyzeResultFile = tmpDir.resolve(Path.of(IDP_RESULT_FILE_NAME));
Files.copy(file.toPath(), documentFile, StandardCopyOption.REPLACE_EXISTING); Files.copy(file.toPath(), documentFile, StandardCopyOption.REPLACE_EXISTING);
Files.copy(file.toPath(), viewerDocumentFile, StandardCopyOption.REPLACE_EXISTING); Files.copy(file.toPath(), viewerDocumentFile, StandardCopyOption.REPLACE_EXISTING);

View File

@ -15,6 +15,7 @@
<exclude name="NullAssignment"/> <exclude name="NullAssignment"/>
<exclude name="AssignmentInOperand"/> <exclude name="AssignmentInOperand"/>
<exclude name="BeanMembersShouldSerialize"/> <exclude name="BeanMembersShouldSerialize"/>
<exclude name="AvoidFieldNameMatchingMethodName"/>
</rule> </rule>
</ruleset> </ruleset>

View File

@ -17,6 +17,7 @@
<exclude name="AssignmentInOperand"/> <exclude name="AssignmentInOperand"/>
<exclude name="TestClassWithoutTestCases"/> <exclude name="TestClassWithoutTestCases"/>
<exclude name="BeanMembersShouldSerialize"/> <exclude name="BeanMembersShouldSerialize"/>
<exclude name="AvoidFieldNameMatchingMethodName"/>
</rule> </rule>
</ruleset> </ruleset>