diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPositionSequence.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPositionSequence.java index 743096c..de03144 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPositionSequence.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPositionSequence.java @@ -208,6 +208,12 @@ public class TextPositionSequence implements CharSequence { } + public float getTextHeightNoPadding() { + + return textPositions.get(0).getHeightDir(); + } + + public float getTextHeight() { return textPositions.get(0).getHeightDir() + HEIGHT_PADDING; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/TextPositionSequenceComparator.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/TextPositionSequenceComparator.java index 40dce07..b002dbc 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/TextPositionSequenceComparator.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/TextPositionSequenceComparator.java @@ -28,15 +28,13 @@ import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPosit * * @author Ben Litchfield */ -public class TextPositionSequenceComparator implements Comparator -{ +public class TextPositionSequenceComparator implements Comparator { + @Override - public int compare(TextPositionSequence pos1, TextPositionSequence pos2) - { + public int compare(TextPositionSequence pos1, TextPositionSequence pos2) { // only compare text that is in the same direction int cmp1 = Float.compare(pos1.getDir().getDegrees(), pos2.getDir().getDegrees()); - if (cmp1 != 0) - { + if (cmp1 != 0) { return cmp1; } @@ -48,25 +46,19 @@ public class TextPositionSequenceComparator implements Comparator= pos1YTop && pos2YBottom <= pos1YBottom || - pos1YBottom >= pos2YTop && pos1YBottom <= pos2YBottom) - { + if (yDifference < .1 || pos2YBottom >= pos1YTop && pos2YBottom <= pos1YBottom || pos1YBottom >= pos2YTop && pos1YBottom <= pos2YBottom) { return Float.compare(x1, x2); - } - else if (pos1YBottom < pos2YBottom) - { + } else if (pos1YBottom < pos2YBottom) { return -1; - } - else - { + } else { return 1; } } + } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java index 197780e..580961e 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java @@ -27,7 +27,7 @@ public class ViewerDocumentTest extends BuildDocumentTest { @SneakyThrows public void testViewerDocument() { - String fileName = "files/100 Trinexapac-ethyl_RAR_20_Volume_3CP_B-9_ 2018-01-10.pdf"; + String fileName = "files/new/ScrambledTextAfterSorting.pdf"; String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf"; var documentFile = new ClassPathResource(fileName).getFile(); @@ -35,7 +35,7 @@ public class ViewerDocumentTest extends BuildDocumentTest { LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService); long start = System.currentTimeMillis(); - Document document = buildGraph(fileName, LayoutParsingType.REDACT_MANAGER); + Document document = buildGraph(fileName, LayoutParsingType.DOCUMINE); layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true); System.out.printf("Total time: %.2fs%n", ((float) (System.currentTimeMillis() - start)) / 1000); } diff --git a/layoutparser-service/layoutparser-service-server/src/test/resources/files/new/ScrambledTextAfterSorting.pdf b/layoutparser-service/layoutparser-service-server/src/test/resources/files/new/ScrambledTextAfterSorting.pdf new file mode 100644 index 0000000..fbf9312 Binary files /dev/null and b/layoutparser-service/layoutparser-service-server/src/test/resources/files/new/ScrambledTextAfterSorting.pdf differ