diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java index 30fe901..abab9c3 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java @@ -263,6 +263,7 @@ public class LayoutParsingPipeline { boolean isLandscape = pdr.getWidth() > pdr.getHeight() && (rotation == 0 || rotation == 180) || pdr.getHeight() > pdr.getWidth() && (rotation == 90 || rotation == 270); PDRectangle cropbox = pdPage.getCropBox(); + classificationDocument.getVisualizations().addRulingVisualization(stripper.getRulings(), pageNumber); CleanRulings cleanRulings = rulingCleaningService.deduplicateAndStraightenRulings(pdfTableCells.get(pageNumber), stripper.getRulings()); List emptyTableCells = TableExtractionService.findCells(cleanRulings.getHorizontals(), cleanRulings.getVerticals(), PageInformation.fromPDPage(pageNumber, pdPage)); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/model/Character.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/model/Character.java index d53b2d3..772f1b2 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/model/Character.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/model/Character.java @@ -27,8 +27,8 @@ public class Character { public Character(RedTextPosition chunk) { - this.x = chunk.getDirectionAdjustedPosition().getCenterX(); - this.y = chunk.getDirectionAdjustedPosition().getCenterY(); + this.x = chunk.getBBoxDirAdj().getCenterX(); + this.y = chunk.getBBoxDirAdj().getCenterY(); this.textPosition = chunk; } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/RedTextPosition.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/RedTextPosition.java index ac215b6..710d7eb 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/RedTextPosition.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/RedTextPosition.java @@ -21,7 +21,8 @@ import lombok.SneakyThrows; public class RedTextPosition extends BoundingBox { public final static int HEIGHT_PADDING = 2; - private Rectangle2D.Float directionAdjustedPosition; // adjusted to text rotation + + private Rectangle2D.Float bBoxDirAdj; // adjusted to text rotation @JsonIgnore private int rotation; @@ -72,7 +73,7 @@ public class RedTextPosition extends BoundingBox { textPosition.getYDirAdj() - textHeight, textPosition.getWidthDirAdj(), textHeight + HEIGHT_PADDING); - pos.setDirectionAdjustedPosition(dirAdjPosition); + pos.setBBoxDirAdj(dirAdjPosition); AffineTransform affineTransform = getRotationMatrix(TextDirection.fromDegrees(textPosition.getDir()), textPosition.getPageWidth(), textPosition.getPageHeight()); Rectangle2D initialUserSpacePositionRect = affineTransform.createTransformedShape(dirAdjPosition).getBounds2D(); @@ -83,6 +84,7 @@ public class RedTextPosition extends BoundingBox { } + private static AffineTransform getRotationMatrix(TextDirection textDirection, float pageWidth, float pageHeight) { AffineTransform transform = new AffineTransform(); @@ -105,28 +107,28 @@ public class RedTextPosition extends BoundingBox { @JsonIgnore public float getXDirAdj() { - return this.directionAdjustedPosition.x; + return this.bBoxDirAdj.x; } @JsonIgnore public float getYDirAdj() { - return this.directionAdjustedPosition.y; + return this.bBoxDirAdj.y; } @JsonIgnore public float getWidthDirAdj() { - return this.directionAdjustedPosition.width; + return this.bBoxDirAdj.width; } @JsonIgnore public float getHeightDir() { - return this.directionAdjustedPosition.height; + return this.bBoxDirAdj.height; } } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPositionSequence.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPositionSequence.java index 2958fb7..15fb48b 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPositionSequence.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPositionSequence.java @@ -1,5 +1,6 @@ package com.knecon.fforesight.service.layoutparser.processor.model.text; +import java.awt.geom.Rectangle2D; import java.util.ArrayList; import java.util.List; import java.util.Locale; @@ -8,6 +9,7 @@ import java.util.stream.Collectors; import org.apache.pdfbox.text.TextPosition; import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.BoundingBox; +import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations; import lombok.AllArgsConstructor; import lombok.Builder; @@ -32,6 +34,7 @@ public class TextPositionSequence extends BoundingBox implements CharSequence { @EqualsAndHashCode.Include private List textPositions = new ArrayList<>(); + private Rectangle2D bBoxDirAdj; @EqualsAndHashCode.Include private TextDirection dir; private int rotation; @@ -53,6 +56,15 @@ public class TextPositionSequence extends BoundingBox implements CharSequence { this.pageHeight = textPositions.get(0).getPageHeight(); this.pageWidth = textPositions.get(0).getPageWidth(); this.isParagraphStart = isParagraphStart; + calculateBBox(); + } + + + private void calculateBBox() { + + this.bBoxDirAdj = textPositions.stream() + .map(RedTextPosition::getBBoxDirAdj) + .collect(RectangleTransformations.collectBBox()); setToBBoxOfComponents(getTextPositions()); } @@ -65,7 +77,7 @@ public class TextPositionSequence extends BoundingBox implements CharSequence { this.rotation = textPositions.get(0).getRotation(); this.pageHeight = textPositions.get(0).getPageHeight(); this.pageWidth = textPositions.get(0).getPageWidth(); - setToBBoxOfComponents(getTextPositions()); + calculateBBox(); } @@ -133,7 +145,7 @@ public class TextPositionSequence extends BoundingBox implements CharSequence { this.rotation = textPositionSequence.getRotation(); this.pageHeight = textPositionSequence.getPageHeight(); this.pageWidth = textPositionSequence.getPageWidth(); - setToBBoxOfComponents(getTextPositions()); + calculateBBox(); ; } @@ -145,7 +157,7 @@ public class TextPositionSequence extends BoundingBox implements CharSequence { this.rotation = textPositions.get(0).getRotation(); this.pageHeight = textPositions.get(0).getPageHeight(); this.pageWidth = textPositions.get(0).getPageWidth(); - setToBBoxOfComponents(getTextPositions()); + calculateBBox(); } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/ImageServiceResponseAdapter.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/ImageServiceResponseAdapter.java index 4517029..968bfbd 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/ImageServiceResponseAdapter.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/ImageServiceResponseAdapter.java @@ -56,7 +56,7 @@ public class ImageServiceResponseAdapter { classificationPage.getImages().forEach(image -> { if (image.getImageType().equals(ImageType.OTHER)) { for (AbstractPageBlock textblock : classificationPage.getTextBlocks()) { - if (image.getPosition().contains(textblock.getBBox())) { + if (image.getPosition().contains(textblock.getBBoxInitialUserSpace())) { image.setImageType(ImageType.OCR); return; } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/RulingCleaningService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/RulingCleaningService.java index e89ea3a..ffe07f8 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/RulingCleaningService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/RulingCleaningService.java @@ -124,9 +124,9 @@ public class RulingCleaningService { } if (ruling.isHorizontal()) { - return new Rectangle2D.Double(x - THRESHOLD_Y_HORIZONTAL, y - THRESHOLD_X_HORIZONTAL, w + 2 * THRESHOLD_X_HORIZONTAL, h + 2 * THRESHOLD_Y_HORIZONTAL); + return new Rectangle2D.Double(x - THRESHOLD_X_HORIZONTAL, y - THRESHOLD_Y_HORIZONTAL, w + 2 * THRESHOLD_X_HORIZONTAL, h + 2 * THRESHOLD_Y_HORIZONTAL); } else { - return new Rectangle2D.Double(x - THRESHOLD_Y_VERTICAL, y - THRESHOLD_X_VERTICAL, w + 2 * THRESHOLD_X_VERTICAL, h + 2 * THRESHOLD_Y_VERTICAL); + return new Rectangle2D.Double(x - THRESHOLD_X_VERTICAL, y - THRESHOLD_Y_VERTICAL, w + 2 * THRESHOLD_X_VERTICAL, h + 2 * THRESHOLD_Y_VERTICAL); } } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TableExtractionService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TableExtractionService.java index 01da019..b28a80b 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TableExtractionService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TableExtractionService.java @@ -142,14 +142,14 @@ public class TableExtractionService { public static List findCells(List horizontalRulingLines, List verticalRulingLines, PageInformation pageInformation) { AffineTransform affineTransform = CoordinateTransforms.calculateInitialUserSpaceCoordsToImageCoords(pageInformation, 1); - + /* switch (pageInformation.rotationDegrees()) { case 90 -> affineTransform.translate(RedTextPosition.HEIGHT_PADDING, 0); //although this is wrong, our text coordinates are wrong as well case 180 -> affineTransform.translate(0, RedTextPosition.HEIGHT_PADDING); case 270 -> affineTransform.translate(-RedTextPosition.HEIGHT_PADDING, 0); default -> affineTransform.translate(0, -RedTextPosition.HEIGHT_PADDING); } - + */ return RectangularIntersectionFinder.find(horizontalRulingLines, verticalRulingLines) .stream() .map(rect -> new Cell(rect, affineTransform)) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TextRulingsClassifier.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TextRulingsClassifier.java index 14c7b17..7fc2d40 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TextRulingsClassifier.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TextRulingsClassifier.java @@ -31,14 +31,14 @@ public class TextRulingsClassifier { private static void handleVerticalText(CleanRulings cleanRulings, TextPositionSequence word) { - float lowerY = (float) (word.getBBox().getMinY() + TEXT_BBOX_THRESHOLD_FACTOR * word.getWidth()); - float upperY = (float) (word.getBBox().getMaxY() - TEXT_BBOX_THRESHOLD_FACTOR * word.getWidth()); + float lowerY = (float) (word.getBBoxInitialUserSpace().getMinY() + TEXT_BBOX_THRESHOLD_FACTOR * word.getWidth()); + float upperY = (float) (word.getBBoxInitialUserSpace().getMaxY() - TEXT_BBOX_THRESHOLD_FACTOR * word.getWidth()); - float strikethroughCenterX = (float) word.getBBox().getCenterX(); - float strikethroughBoxHeight = (float) ((word.getHeight() * STRIKETHROUGH_ZONE) / 2); + float strikethroughCenterX = (float) word.getBBoxInitialUserSpace().getCenterX(); + float strikethroughBoxHeight = (float) ((word.getBBoxDirAdj().getHeight() * STRIKETHROUGH_ZONE) / 2); - float underlineCenterX = (float) (word.getDir().equals(TextDirection.QUARTER_CIRCLE) ? word.getBBox().getMaxX() : word.getBBox().getMinX()); - float underlineBoxHeight = (float) ((word.getHeight() * UNDERLINE_ZONE) / 2); + float underlineCenterX = (float) (word.getDir().equals(TextDirection.QUARTER_CIRCLE) ? word.getBBoxInitialUserSpace().getMaxX() : word.getBBoxInitialUserSpace().getMinX()); + float underlineBoxHeight = (float) ((word.getBBoxDirAdj().getHeight() * UNDERLINE_ZONE) / 2); float leftX = Math.min(underlineCenterX - underlineBoxHeight, strikethroughCenterX - strikethroughBoxHeight); float rightX = Math.max(underlineCenterX + underlineBoxHeight, strikethroughCenterX + strikethroughBoxHeight); @@ -65,14 +65,14 @@ public class TextRulingsClassifier { private static void handleHorizontalText(CleanRulings cleanRulings, TextPositionSequence word) { - float leftX = (float) (word.getBBox().getMinX() + TEXT_BBOX_THRESHOLD_FACTOR * word.getWidth()); - float rightX = (float) (word.getBBox().getMaxX() - TEXT_BBOX_THRESHOLD_FACTOR * word.getWidth()); + float leftX = (float) (word.getBBoxInitialUserSpace().getMinX() + TEXT_BBOX_THRESHOLD_FACTOR * word.getWidth()); + float rightX = (float) (word.getBBoxInitialUserSpace().getMaxX() - TEXT_BBOX_THRESHOLD_FACTOR * word.getWidth()); - float strikethroughCenterY = (float) word.getBBox().getCenterY(); - float strikethroughBoxHeight = (float) ((word.getHeight() * STRIKETHROUGH_ZONE) / 2); + float strikethroughCenterY = (float) word.getBBoxInitialUserSpace().getCenterY(); + float strikethroughBoxHeight = (float) ((word.getBBoxDirAdj().getHeight() * STRIKETHROUGH_ZONE) / 2); - float underlineCenterY = (float) (word.getDir().equals(TextDirection.ZERO) ? word.getBBox().getMinY() : word.getBBox().getMaxY()); - float underlineBoxHeight = (float) ((word.getHeight() * UNDERLINE_ZONE) / 2); + float underlineCenterY = (float) (word.getDir().equals(TextDirection.ZERO) ? word.getBBoxInitialUserSpace().getMinY() : word.getBBoxInitialUserSpace().getMaxY()); + float underlineBoxHeight = (float) ((word.getBBoxDirAdj().getHeight() * UNDERLINE_ZONE) / 2); float lowerY = Math.min(underlineCenterY - underlineBoxHeight, strikethroughCenterY - strikethroughBoxHeight); float upperY = Math.max(underlineCenterY + underlineBoxHeight, strikethroughCenterY + strikethroughBoxHeight); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SearchTextWithTextPositionFactory.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SearchTextWithTextPositionFactory.java index 28f2b68..0d9fd8f 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SearchTextWithTextPositionFactory.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SearchTextWithTextPositionFactory.java @@ -39,7 +39,7 @@ public class SearchTextWithTextPositionFactory { RedTextPosition currentTextPosition = sequences.get(0).getTextPositions() .get(0); - RedTextPosition previousTextPosition = RedTextPosition.builder().unicode(" ").directionAdjustedPosition(currentTextPosition.getDirectionAdjustedPosition()).build(); + RedTextPosition previousTextPosition = RedTextPosition.builder().unicode(" ").bBoxDirAdj(currentTextPosition.getBBoxDirAdj()).build(); for (TextPositionSequence word : sequences) { for (int i = 0; i < word.getTextPositions().size(); ++i) { @@ -61,7 +61,7 @@ public class SearchTextWithTextPositionFactory { ++context.positionIdx; } - previousTextPosition = RedTextPosition.builder().unicode(" ").directionAdjustedPosition(previousTextPosition.getDirectionAdjustedPosition()).build(); + previousTextPosition = RedTextPosition.builder().unicode(" ").bBoxDirAdj(previousTextPosition.getBBoxDirAdj()).build(); context.stringBuilder.append(" "); context.stringIdxToPositionIdx.add(context.positionIdx); ++context.stringIdx; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/MarkedContentUtils.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/MarkedContentUtils.java index 6c24f79..3e87eb4 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/MarkedContentUtils.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/MarkedContentUtils.java @@ -11,6 +11,7 @@ import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent; import org.apache.pdfbox.text.TextPosition; +import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.BoundingBox; import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock; import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence; @@ -47,7 +48,7 @@ public class MarkedContentUtils { return markedContentByYPosition.values() .stream() - .map(textPositions -> new TextPositionSequence(textPositions, 0, true).getBBox()) + .map(textPositions -> new TextPositionSequence(textPositions, 0, true).getBBoxInitialUserSpace()) .map(t -> new Rectangle2D.Double(t.getX(), t.getY() - Math.abs(t.getHeight()), t.getWidth(), Math.abs(t.getHeight()))) .collect(Collectors.toList()); } @@ -89,7 +90,7 @@ public class MarkedContentUtils { .map(content -> (TextPosition) content) .filter(content -> !content.getUnicode().equals(" ")) .map(textPositions -> new TextPositionSequence(List.of(textPositions), 0, true)) - .map(TextPositionSequence::getBBox) + .map(BoundingBox::getBBoxInitialUserSpace) .collect(Collectors.toList()); } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/visualization/LayoutparsingVisualizations.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/visualization/LayoutparsingVisualizations.java index 89f40b2..e89ef31 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/visualization/LayoutparsingVisualizations.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/visualization/LayoutparsingVisualizations.java @@ -76,6 +76,7 @@ public class LayoutparsingVisualizations { final Visualizations lines = Visualizations.builder().layer(ContentStreams.LINES).build(); final Visualizations zones = Visualizations.builder().layer(ContentStreams.ZONES).build(); final Visualizations mainBody = Visualizations.builder().layer(ContentStreams.MAIN_BODY).build(); + final Visualizations clean_rulings = Visualizations.builder().layer(ContentStreams.CLEAN_RULINGS).build(); final Visualizations rulings = Visualizations.builder().layer(ContentStreams.RULINGS).build(); final Visualizations cells = Visualizations.builder().layer(ContentStreams.CELLS).build(); final Visualizations markedContent = Visualizations.builder().layer(ContentStreams.MARKED_CONTENT).build(); @@ -94,6 +95,7 @@ public class LayoutparsingVisualizations { lines, // zones, // rulings, // + clean_rulings, // cells, // mainBody, // markedContent // @@ -120,11 +122,24 @@ public class LayoutparsingVisualizations { if (!active) { return; } - VisualizationsOnPage visualizationsOnPage = getOrCreateVisualizationsOnPage(pageNumber, this.rulings); + VisualizationsOnPage visualizationsOnPage = getOrCreateVisualizationsOnPage(pageNumber, this.clean_rulings); visualizationsOnPage.getColoredLines() .addAll(cleanRulings.buildAll() .stream() - .map(ruling -> new ColoredLine(ruling, decideOnRulingColor(ruling), 1)) + .map(ruling -> new ColoredLine(ruling, decideOnRulingColor(ruling), 0.5f)) + .toList()); + } + + public void addRulingVisualization(List rulings, int pageNumber) { + + if (!active) { + return; + } + VisualizationsOnPage visualizationsOnPage = getOrCreateVisualizationsOnPage(pageNumber, this.rulings); + visualizationsOnPage.getColoredLines() + .addAll(rulings + .stream() + .map(ruling -> new ColoredLine(ruling, decideOnRulingColor(ruling), 0.5f)) .toList()); } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java index 2ea1fee..8763e37 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java @@ -34,7 +34,7 @@ public class LayoutparserEnd2EndTest extends AbstractTest { @Test public void testLayoutParserEndToEnd() { - String filePath = "files/syngenta/CustomerFiles/54 Fludioxonil - EU AIR3 - Document E1 - Listing of Community and Member States MRLs.pdf"; + String filePath = "files/Minimal Examples/RotateTextWithRulingsTestFile.pdf"; runForFile(filePath); } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/PageContentExtractorTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/PageContentExtractorTest.java index 4cc0d16..ec7d002 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/PageContentExtractorTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/PageContentExtractorTest.java @@ -29,7 +29,7 @@ class PageContentExtractorTest { textPositionPerPage.stream() .map(t -> t.getSortedTextPositionSequences() .stream() - .map(TextPositionSequence::getBBox) + .map(TextPositionSequence::getBBoxInitialUserSpace) .map(List::of) .toList()) .toList(), tmpFileName); diff --git a/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/ContentStreams.java b/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/ContentStreams.java index f35a95c..560da8a 100644 --- a/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/ContentStreams.java +++ b/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/ContentStreams.java @@ -26,6 +26,8 @@ public class ContentStreams { public static Identifier ESCAPE_END = new Identifier("escape start", COSName.getPDFName("ESCAPE_END"), false); + public static Identifier CLEAN_RULINGS = new Identifier("Cleaned Rulings", COSName.getPDFName("KNECON_CLEAN_RULINGS"), true); + public static Identifier RULINGS = new Identifier("Rulings", COSName.getPDFName("KNECON_RULINGS"), true); public static Identifier WORDS = new Identifier("Words", COSName.getPDFName("KNECON_WORDS"), true); @@ -53,6 +55,7 @@ public class ContentStreams { ESCAPE_START, ESCAPE_END, RULINGS, + CLEAN_RULINGS, WORDS, ZONES, LINES,