RED-8627: Fixed scrambled text after sorting

This commit is contained in:
Dominique Eifländer 2024-03-19 10:58:36 +01:00
parent ac850c2626
commit 8e7e588d26
4 changed files with 18 additions and 20 deletions

View File

@ -208,6 +208,12 @@ public class TextPositionSequence implements CharSequence {
} }
public float getTextHeightNoPadding() {
return textPositions.get(0).getHeightDir();
}
public float getTextHeight() { public float getTextHeight() {
return textPositions.get(0).getHeightDir() + HEIGHT_PADDING; return textPositions.get(0).getHeightDir() + HEIGHT_PADDING;

View File

@ -28,15 +28,13 @@ import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPosit
* *
* @author Ben Litchfield * @author Ben Litchfield
*/ */
public class TextPositionSequenceComparator implements Comparator<TextPositionSequence> public class TextPositionSequenceComparator implements Comparator<TextPositionSequence> {
{
@Override @Override
public int compare(TextPositionSequence pos1, TextPositionSequence pos2) public int compare(TextPositionSequence pos1, TextPositionSequence pos2) {
{
// only compare text that is in the same direction // only compare text that is in the same direction
int cmp1 = Float.compare(pos1.getDir().getDegrees(), pos2.getDir().getDegrees()); int cmp1 = Float.compare(pos1.getDir().getDegrees(), pos2.getDir().getDegrees());
if (cmp1 != 0) if (cmp1 != 0) {
{
return cmp1; return cmp1;
} }
@ -48,25 +46,19 @@ public class TextPositionSequenceComparator implements Comparator<TextPositionSe
float pos2YBottom = pos2.getMaxYDirAdj(); float pos2YBottom = pos2.getMaxYDirAdj();
// note that the coordinates have been adjusted so 0,0 is in upper left // note that the coordinates have been adjusted so 0,0 is in upper left
float pos1YTop = pos1YBottom - pos1.getTextHeight(); float pos1YTop = pos1YBottom - pos1.getTextHeightNoPadding();
float pos2YTop = pos2YBottom - pos2.getTextHeight(); float pos2YTop = pos2YBottom - pos2.getTextHeightNoPadding();
float yDifference = Math.abs(pos1YBottom - pos2YBottom); float yDifference = Math.abs(pos1YBottom - pos2YBottom);
// we will do a simple tolerance comparison // we will do a simple tolerance comparison
if (yDifference < .1 || if (yDifference < .1 || pos2YBottom >= pos1YTop && pos2YBottom <= pos1YBottom || pos1YBottom >= pos2YTop && pos1YBottom <= pos2YBottom) {
pos2YBottom >= pos1YTop && pos2YBottom <= pos1YBottom ||
pos1YBottom >= pos2YTop && pos1YBottom <= pos2YBottom)
{
return Float.compare(x1, x2); return Float.compare(x1, x2);
} } else if (pos1YBottom < pos2YBottom) {
else if (pos1YBottom < pos2YBottom)
{
return -1; return -1;
} } else {
else
{
return 1; return 1;
} }
} }
} }

View File

@ -27,7 +27,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
@SneakyThrows @SneakyThrows
public void testViewerDocument() { public void testViewerDocument() {
String fileName = "files/100 Trinexapac-ethyl_RAR_20_Volume_3CP_B-9_ 2018-01-10.pdf"; String fileName = "files/new/ScrambledTextAfterSorting.pdf";
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf"; String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
var documentFile = new ClassPathResource(fileName).getFile(); var documentFile = new ClassPathResource(fileName).getFile();
@ -35,7 +35,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService); LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
Document document = buildGraph(fileName, LayoutParsingType.REDACT_MANAGER); Document document = buildGraph(fileName, LayoutParsingType.DOCUMINE);
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true); layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true);
System.out.printf("Total time: %.2fs%n", ((float) (System.currentTimeMillis() - start)) / 1000); System.out.printf("Total time: %.2fs%n", ((float) (System.currentTimeMillis() - start)) / 1000);
} }