enable snuggification again, but with min size
This commit is contained in:
parent
e8483a8352
commit
8bbc33e01b
@ -18,7 +18,7 @@ public class OcrServiceSettings {
|
||||
|
||||
boolean debug; // writes the ocr layer visibly to the viewer doc pdf
|
||||
boolean drawTablesAsLines; // writes the tables to the PDF as invisible lines.
|
||||
boolean snuggify; // Enables bold detection using ghostscript and leptonica
|
||||
boolean snuggify = true;
|
||||
String contentFormat; // Either markdown or text. But, for whatever reason, with markdown enabled, key-values are not written by azure....
|
||||
|
||||
}
|
||||
|
||||
@ -30,6 +30,8 @@ public class BBoxSnuggificationService {
|
||||
public static final int PIXEL_COUNT_THRESHOLD = 2; // minimum active pixel count per row for shrinking to stop
|
||||
private static final double AVERAGE_ANGLE_THRESHOLD = 0.2; // Skips snuggification, if the average remaining word rotation of a word, written from left-to-right is bigger than this
|
||||
public static final int INDIVIDUAL_ANGLE_THRESHOLD = 5; // skips snuggification for word, if the remaining rotation is larger than this angle
|
||||
public static final int MAX_SHRINK_PIXELS = 40; // Number of pixels that are allowed to be removed from the top or bottom of an image
|
||||
private static final int MINIMUM_WORD_Pixels = 5;
|
||||
|
||||
private enum Operation {
|
||||
HORIZONTAL,
|
||||
@ -116,13 +118,13 @@ public class BBoxSnuggificationService {
|
||||
Numa colCounts = Leptonica1.pixCountPixelsByColumn(wordImage);
|
||||
int start = 0;
|
||||
int end = wordImage.w - PIXEL_COUNT_THRESHOLD;
|
||||
for (int i = start; i < Math.min(wordImage.w, 25); i++) {
|
||||
for (int i = start; i < Math.min(wordImage.w, MAX_SHRINK_PIXELS); i++) {
|
||||
if (pixCountPerColumn(i, colCounts) > PIXEL_COUNT_THRESHOLD) {
|
||||
start = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (int i = end; i > Math.max(0, wordImage.w - 25); i--) {
|
||||
for (int i = end; i > Math.max(0, wordImage.w - MAX_SHRINK_PIXELS); i--) {
|
||||
if (pixCountPerColumn(i, colCounts) > PIXEL_COUNT_THRESHOLD) {
|
||||
end = i;
|
||||
break;
|
||||
@ -131,7 +133,9 @@ public class BBoxSnuggificationService {
|
||||
if (start == 0 && end == wordImage.w) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
if (Math.abs(start - end) < MINIMUM_WORD_Pixels) {
|
||||
return Optional.empty();
|
||||
}
|
||||
return Optional.of(new Rectangle2D.Double(origin.getX() + start, origin.getY(), origin.getWidth() - start - (wordImage.w - end), origin.getHeight()));
|
||||
}
|
||||
|
||||
@ -140,13 +144,13 @@ public class BBoxSnuggificationService {
|
||||
|
||||
int start = 0;
|
||||
int end = wordImage.h - 1;
|
||||
for (int i = start; i < Math.min(wordImage.h, 25); i++) {
|
||||
for (int i = start; i < Math.min(wordImage.h, MAX_SHRINK_PIXELS); i++) {
|
||||
if (pixCountPerRow(i, wordImage) > PIXEL_COUNT_THRESHOLD) {
|
||||
start = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (int i = end; i > Math.max(0, wordImage.h - 25); i--) {
|
||||
for (int i = end; i > Math.max(0, wordImage.h - MAX_SHRINK_PIXELS); i--) {
|
||||
if (pixCountPerRow(i, wordImage) > PIXEL_COUNT_THRESHOLD) {
|
||||
end = i;
|
||||
break;
|
||||
@ -155,6 +159,9 @@ public class BBoxSnuggificationService {
|
||||
if (start == 0 && end == wordImage.h) {
|
||||
return Optional.empty();
|
||||
}
|
||||
if (Math.abs(start - end) < MINIMUM_WORD_Pixels) {
|
||||
return Optional.empty();
|
||||
}
|
||||
return Optional.of(new Rectangle2D.Double(origin.getX(), origin.getY() + start, origin.getWidth(), origin.getHeight() - start - (wordImage.h - end)));
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user