Compare commits

...

5 Commits
main ... 0.78.4

Author SHA1 Message Date
Dominique Eifländer
07da43f2d9 hotfix: revert layoutparsingResponseQueue changes that is not in persistence-service, should be done in migration to 4.0.0 2024-04-24 15:24:19 +02:00
Dominique Eifländer
df0bbc92c7 RED-8932 Fixed not merged headline with identifier 2024-04-24 11:38:26 +02:00
Kilian Schüttler
0497d764ec Merge branch 'hotfix' into 'release/0.78.x'
hotfix: remove DLQ for layoutparsing finished queue

See merge request fforesight/layout-parser!129
2024-04-08 15:39:04 +02:00
Kilian Schuettler
1362e4fbb2 hotfix: remove DLQ for layoutparsing finished queue 2024-04-08 15:31:35 +02:00
Dominique Eifländer
665ad40b0b RED-8627: Fixed scrambled text after sorting 2024-03-19 14:46:04 +01:00
5 changed files with 22 additions and 21 deletions

View File

@ -16,6 +16,8 @@ deploy:
reports:
dotenv: version.env
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
- if: $CI_COMMIT_BRANCH =~ /^release/
- if: $CI_COMMIT_TAG
pmd:
allow_failure: true

View File

@ -196,6 +196,12 @@ public class TextPositionSequence implements CharSequence {
}
public float getTextHeightNoPadding() {
return textPositions.get(0).getHeightDir();
}
@JsonIgnore
@JsonAttribute(ignore = true)
public float getTextHeight() {
@ -234,6 +240,7 @@ public class TextPositionSequence implements CharSequence {
@JsonIgnore
@JsonAttribute(ignore = true)
public String getFontStyle() {
if (textPositions.get(0).getFontName() == null) {
return "standard";
}

View File

@ -61,8 +61,8 @@ public class DocuMineBlockificationService {
boolean newLineAfterSplit = prev != null && word.getMinYDirAdj() != prev.getMinYDirAdj() && wasSplitted && splitX1 != word.getMinXDirAdj();
boolean isSplitByRuling = isSplitByRuling(minX, minY, maxX, maxY, word, horizontalRulingLines, verticalRulingLines);
boolean splitByDir = prev != null && !prev.getDir().equals(word.getDir());
boolean splitByOtherFontAndOtherY = prev != null && prev.getMaxYDirAdj() != word.getMaxYDirAdj() && (word.getFontStyle().contains("bold") && !prev.getFontStyle()
.contains("bold") || prev.getFontStyle().contains("bold") && !word.getFontStyle().contains("bold"));
boolean splitByOtherFontAndOtherY = prev != null && Math.abs(prev.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight() * 0.2 && (word.getFontStyle()
.contains("bold") && !prev.getFontStyle().contains("bold") || prev.getFontStyle().contains("bold") && !word.getFontStyle().contains("bold"));
Matcher matcher = pattern.matcher(chunkWords.stream().collect(Collectors.joining(" ")).toString());
boolean startsOnSameX = Math.abs(minX - word.getMinXDirAdj()) < 5 && matcher.matches();

View File

@ -28,15 +28,13 @@ import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPosit
*
* @author Ben Litchfield
*/
public class TextPositionSequenceComparator implements Comparator<TextPositionSequence>
{
public class TextPositionSequenceComparator implements Comparator<TextPositionSequence> {
@Override
public int compare(TextPositionSequence pos1, TextPositionSequence pos2)
{
public int compare(TextPositionSequence pos1, TextPositionSequence pos2) {
// only compare text that is in the same direction
int cmp1 = Float.compare(pos1.getDir().getDegrees(), pos2.getDir().getDegrees());
if (cmp1 != 0)
{
if (cmp1 != 0) {
return cmp1;
}
@ -48,25 +46,19 @@ public class TextPositionSequenceComparator implements Comparator<TextPositionSe
float pos2YBottom = pos2.getMaxYDirAdj();
// note that the coordinates have been adjusted so 0,0 is in upper left
float pos1YTop = pos1YBottom - pos1.getTextHeight();
float pos2YTop = pos2YBottom - pos2.getTextHeight();
float pos1YTop = pos1YBottom - pos1.getTextHeightNoPadding();
float pos2YTop = pos2YBottom - pos2.getTextHeightNoPadding();
float yDifference = Math.abs(pos1YBottom - pos2YBottom);
// we will do a simple tolerance comparison
if (yDifference < .1 ||
pos2YBottom >= pos1YTop && pos2YBottom <= pos1YBottom ||
pos1YBottom >= pos2YTop && pos1YBottom <= pos2YBottom)
{
if (yDifference < .1 || pos2YBottom >= pos1YTop && pos2YBottom <= pos1YBottom || pos1YBottom >= pos2YTop && pos1YBottom <= pos2YBottom) {
return Float.compare(x1, x2);
}
else if (pos1YBottom < pos2YBottom)
{
} else if (pos1YBottom < pos2YBottom) {
return -1;
}
else
{
} else {
return 1;
}
}
}