Merge branch 'RED-7158' into 'main'

RED-7158: fix for all page rotations

See merge request fforesight/layout-parser!35
This commit is contained in:
Kilian Schüttler 2023-08-15 15:07:04 +02:00
commit 457f7d9c66
2 changed files with 82 additions and 31 deletions

View File

@ -8,10 +8,13 @@ import java.awt.geom.Point2D;
import java.awt.geom.Rectangle2D; import java.awt.geom.Rectangle2D;
import java.awt.geom.RectangularShape; import java.awt.geom.RectangularShape;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
@ -33,7 +36,7 @@ public class LayoutGridService {
private static final Color INNER_LINES_COLOR = new Color(255, 175, 175); private static final Color INNER_LINES_COLOR = new Color(255, 175, 175);
private static final Color PARAGRAPH_COLOR = new Color(70, 130, 180); private static final Color PARAGRAPH_COLOR = new Color(70, 130, 180);
public static final Color TABLE_COLOR = new Color(102, 205, 170); public static final Color TABLE_COLOR = new Color(102, 205, 170);
public static final Color SECTION_COLOR = new Color(23, 23, 23); public static final Color SECTION_COLOR = new Color(50, 50, 50);
public static final Color HEADLINE_COLOR = new Color(162, 56, 56); public static final Color HEADLINE_COLOR = new Color(162, 56, 56);
public static final Color HEADER_COLOR = new Color(171, 131, 6); public static final Color HEADER_COLOR = new Color(171, 131, 6);
public static final Color IMAGE_COLOR = new Color(253, 63, 146); public static final Color IMAGE_COLOR = new Color(253, 63, 146);
@ -60,7 +63,7 @@ public class LayoutGridService {
} }
if (semanticNode.getType().equals(NodeType.TABLE)) { if (semanticNode.getType().equals(NodeType.TABLE)) {
Table table = (Table) semanticNode; Table table = (Table) semanticNode;
addInnerTableLines(table, layoutGrid, INNER_LINES_COLOR); addInnerTableLines(table, layoutGrid);
} }
}); });
@ -68,26 +71,51 @@ public class LayoutGridService {
} }
private void addInnerTableLines(Table table, LayoutGrid layoutGrid, Color color) { private void addInnerTableLines(Table table, LayoutGrid layoutGrid) {
if (table.getNumberOfCols() < 1 || table.getNumberOfRows() < 1) { if (table.getNumberOfCols() < 1 || table.getNumberOfRows() < 1) {
return; return;
} }
// only draw inner lines -> remove first values for (Page page : table.getPages()) {
List<Double> xs = table.streamRow(0).map(TableCell::getBBox).map(map -> map.values().stream().findAny().get()).map(RectangularShape::getMinX).collect(Collectors.toList()); Optional<Integer> optionalFirstRowOnPage = table.streamCol(0).filter(tableCell -> tableCell.isOnPage(page.getNumber())).map(TableCell::getRow).findFirst();
xs.remove(0); if (optionalFirstRowOnPage.isEmpty()) {
List<Double> ys = table.streamCol(0).map(TableCell::getBBox).map(map -> map.values().stream().findAny().get()).map(RectangularShape::getMaxY).collect(Collectors.toList()); continue;
ys.remove(0); }
Rectangle2D tableBBox = table.getBBox().get(table.getFirstPage()); int firstRowOnPage = optionalFirstRowOnPage.get();
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(table.getFirstPage().getNumber() - 1).getColoredLines(); Stream<Double> xStream = switch (page.getRotation()) {
xs.forEach(x -> { case 90 -> streamBBoxOfCellsOnPage(table.streamCol(0), page).map(RectangularShape::getMinX);
Line2D line = new Line2D.Double(new Point2D.Double(x, tableBBox.getMaxY()), new Point2D.Double(x, tableBBox.getMinY())); case 180 -> streamBBoxOfCellsOnPage(table.streamRow(firstRowOnPage), page).map(RectangularShape::getMaxX);
coloredLines.add(new ColoredLine(line, color)); case 270 -> streamBBoxOfCellsOnPage(table.streamCol(0), page).map(RectangularShape::getMaxX);
}); default -> streamBBoxOfCellsOnPage(table.streamRow(firstRowOnPage), page).map(RectangularShape::getMinX);
ys.forEach(y -> { };
Line2D line = new Line2D.Double(new Point2D.Double(tableBBox.getMinX(), y), new Point2D.Double(tableBBox.getMaxX(), y)); List<Double> xs = xStream.collect(Collectors.toList());
coloredLines.add(new ColoredLine(line, color)); xs.remove(0);
}); Stream<Double> yStream = switch (page.getRotation()) {
case 90 -> streamBBoxOfCellsOnPage(table.streamRow(firstRowOnPage), page).map(RectangularShape::getMinY);
case 180 -> streamBBoxOfCellsOnPage(table.streamCol(0), page).map(RectangularShape::getMinY);
case 270 -> streamBBoxOfCellsOnPage(table.streamRow(firstRowOnPage), page).map(RectangularShape::getMaxY);
default -> streamBBoxOfCellsOnPage(table.streamCol(0), page).map(RectangularShape::getMaxY);
};
List<Double> ys = yStream.collect(Collectors.toList());
ys.remove(0);
Rectangle2D tableBBox = table.getBBox().get(table.getFirstPage());
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(page.getNumber() - 1).getColoredLines();
xs.forEach(x -> {
Line2D line = new Line2D.Double(new Point2D.Double(x, tableBBox.getMaxY()), new Point2D.Double(x, tableBBox.getMinY()));
coloredLines.add(new ColoredLine(line, INNER_LINES_COLOR));
});
ys.forEach(y -> {
Line2D line = new Line2D.Double(new Point2D.Double(tableBBox.getMinX(), y), new Point2D.Double(tableBBox.getMaxX(), y));
coloredLines.add(new ColoredLine(line, INNER_LINES_COLOR));
});
}
}
private static Stream<Rectangle2D> streamBBoxOfCellsOnPage(Stream<TableCell> table, Page page) {
return table.filter(tableCell -> tableCell.isOnPage(page.getNumber())).map(TableCell::getBBox).map(bBoxMap -> bBoxMap.get(page));
} }
@ -95,17 +123,16 @@ public class LayoutGridService {
Map<Page, Rectangle2D> bBoxMap = semanticNode.getBBox(); Map<Page, Rectangle2D> bBoxMap = semanticNode.getBBox();
List<SemanticNode> subSections = semanticNode.streamAllSubNodesOfType(NodeType.SECTION).toList(); List<SemanticNode> subSections = semanticNode.streamAllSubNodesOfType(NodeType.SECTION).toList();
Page firstPage = semanticNode.getFirstPage();
if (!subSections.isEmpty()) { if (!subSections.isEmpty()) {
Page firstPage = semanticNode.getFirstPage();
addPlacedText(firstPage, bBoxMap.get(firstPage), buildTreeIdString(semanticNode), layoutGrid); addPlacedText(firstPage, bBoxMap.get(firstPage), buildTreeIdString(semanticNode), layoutGrid);
} else { } else {
bBoxMap.forEach(((page, textBBox) -> addPlacedText(page, textBBox, buildTreeIdString(semanticNode), layoutGrid))); bBoxMap.forEach(((page, textBBox) -> addPlacedText(page, textBBox, buildTreeIdString(semanticNode), layoutGrid)));
} }
if (bBoxMap.values().size() == 1) { if (bBoxMap.values().size() == 1) {
Rectangle2D r = RectangleTransformations.pad(bBoxMap.values().stream().findFirst().get(), LINE_WIDTH, LINE_WIDTH); Rectangle2D r = RectangleTransformations.pad(bBoxMap.get(firstPage), LINE_WIDTH, LINE_WIDTH);
int pageNumber = bBoxMap.keySet().stream().findFirst().get().getNumber() - 1; List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(firstPage.getNumber() - 1).getColoredLines();
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(pageNumber).getColoredLines(); List<Line2D> lines = createLinesFromRectangle(r, firstPage.getRotation());
List<Line2D> lines = createLinesFromRectangle(r);
// add string to top line // add string to top line
var firstLine = lines.remove(0); var firstLine = lines.remove(0);
coloredLines.add(new ColoredLine(firstLine, color)); coloredLines.add(new ColoredLine(firstLine, color));
@ -115,7 +142,7 @@ public class LayoutGridService {
return; return;
} }
List<Page> pagesInOrder = bBoxMap.keySet().stream().sorted(Comparator.comparingInt(Page::getNumber)).collect(Collectors.toList()); List<Page> pagesInOrder = bBoxMap.keySet().stream().sorted(Comparator.comparingInt(Page::getNumber)).collect(Collectors.toList());
var firstPage = pagesInOrder.remove(0); pagesInOrder.remove(0);
addLinesForFirstPageOfSection(semanticNode, color, firstPage, layoutGrid); addLinesForFirstPageOfSection(semanticNode, color, firstPage, layoutGrid);
var lastPage = pagesInOrder.remove(pagesInOrder.size() - 1); var lastPage = pagesInOrder.remove(pagesInOrder.size() - 1);
addLinesForLastPageOfSection(semanticNode, color, lastPage, layoutGrid); addLinesForLastPageOfSection(semanticNode, color, lastPage, layoutGrid);
@ -127,8 +154,14 @@ public class LayoutGridService {
private void addPlacedText(Page page, Rectangle2D textBBox, String s, LayoutGrid layoutGrid) { private void addPlacedText(Page page, Rectangle2D textBBox, String s, LayoutGrid layoutGrid) {
Point2D.Float upperLeftCorner = switch (page.getRotation()) {
case 90 -> new Point2D.Float((float) (textBBox.getMinX()), (float) textBBox.getMinY());
case 180 -> new Point2D.Float((float) (textBBox.getMaxX()), (float) textBBox.getMinY());
case 270 -> new Point2D.Float((float) (textBBox.getMaxX()), (float) textBBox.getMaxY());
default -> new Point2D.Float((float) (textBBox.getMinX()), (float) textBBox.getMaxY());
};
var placedTexts = layoutGrid.getVisualizationsPerPages().get(page.getNumber() - 1).getPlacedTexts(); var placedTexts = layoutGrid.getVisualizationsPerPages().get(page.getNumber() - 1).getPlacedTexts();
placedTexts.add(new PlacedText(s, new Point2D.Float((float) (textBBox.getMinX()), (float) textBBox.getMaxY()))); placedTexts.add(new PlacedText(s, upperLeftCorner));
} }
@ -136,7 +169,7 @@ public class LayoutGridService {
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(middlePage.getNumber() - 1).getColoredLines(); List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(middlePage.getNumber() - 1).getColoredLines();
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(middlePage), LINE_WIDTH, LINE_WIDTH); Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(middlePage), LINE_WIDTH, LINE_WIDTH);
var midPageLines = createLinesFromRectangle(r); var midPageLines = createLinesFromRectangle(r, middlePage.getRotation());
// remove top line // remove top line
midPageLines.remove(0); midPageLines.remove(0);
// remove top line // remove top line
@ -154,7 +187,7 @@ public class LayoutGridService {
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(lastPage.getNumber() - 1).getColoredLines(); List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(lastPage.getNumber() - 1).getColoredLines();
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(lastPage), LINE_WIDTH, LINE_WIDTH); Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(lastPage), LINE_WIDTH, LINE_WIDTH);
var lastPageLines = createLinesFromRectangle(r); var lastPageLines = createLinesFromRectangle(r, lastPage.getRotation());
// remove top line // remove top line
lastPageLines.remove(0); lastPageLines.remove(0);
// add string to left line // add string to left line
@ -170,7 +203,7 @@ public class LayoutGridService {
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(firstPage.getNumber() - 1).getColoredLines(); List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(firstPage.getNumber() - 1).getColoredLines();
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(firstPage), LINE_WIDTH, LINE_WIDTH); Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(firstPage), LINE_WIDTH, LINE_WIDTH);
var firstPageLines = createLinesFromRectangle(r); var firstPageLines = createLinesFromRectangle(r, firstPage.getRotation());
// remove bottom line // remove bottom line
firstPageLines.remove(2); firstPageLines.remove(2);
// add string to top line // add string to top line
@ -196,8 +229,9 @@ public class LayoutGridService {
| | | |
D|__________________| C D|__________________| C
The returned List are the lines [AB, BC, DC, AD] The returned List are the lines [AB, BC, DC, AD]
The List is reordered, such that the order of the returned lines are always as viewed on the page.
*/ */
private List<Line2D> createLinesFromRectangle(Rectangle2D r) { private List<Line2D> createLinesFromRectangle(Rectangle2D r, int pageRotation) {
// +0.5 to join the lines // +0.5 to join the lines
List<Line2D> lines = new ArrayList<>(4); List<Line2D> lines = new ArrayList<>(4);
float lineWidthCorrection = LINE_WIDTH * 0.5f; float lineWidthCorrection = LINE_WIDTH * 0.5f;
@ -213,7 +247,23 @@ public class LayoutGridService {
lines.add(new Line2D.Float(b, c)); lines.add(new Line2D.Float(b, c));
lines.add(new Line2D.Float(d1, c1)); lines.add(new Line2D.Float(d1, c1));
lines.add(new Line2D.Float(a, d)); lines.add(new Line2D.Float(a, d));
return lines;
return switch (pageRotation) {
case 90 -> {
Collections.rotate(lines, 1);
yield lines;
}
case 180 -> {
Collections.rotate(lines, 2);
yield lines;
}
case 270 -> {
Collections.rotate(lines, 3);
yield lines;
}
default -> lines;
};
} }

View File

@ -42,7 +42,7 @@ public class ViewerDocumentService {
private static final String layerName = "Layout grid"; private static final String layerName = "Layout grid";
private static final int FONT_SIZE = 10; private static final int FONT_SIZE = 10;
public static final float LINE_WIDTH = 1.5f; public static final float LINE_WIDTH = 1f;
private final LayoutGridService layoutGridService; private final LayoutGridService layoutGridService;
@ -118,6 +118,7 @@ public class ViewerDocumentService {
dictionariesToUpdate.add(pdPage.getCOSObject()); dictionariesToUpdate.add(pdPage.getCOSObject());
dictionariesToUpdate.add(pdPage.getResources().getCOSObject()); dictionariesToUpdate.add(pdPage.getResources().getCOSObject());
} }
dictionariesToUpdate.add(pdDocument.getDocumentInformation().getCOSObject());
pdDocument.saveIncremental(outputStream, dictionariesToUpdate); pdDocument.saveIncremental(outputStream, dictionariesToUpdate);
log.info("Saved Viewer Document"); log.info("Saved Viewer Document");
} }