RED-7851: add layoutgrid to new viewer document as optional content
This commit is contained in:
parent
810caa0624
commit
ea0af08c31
@ -19,6 +19,7 @@ public record LayoutParsingRequest(
|
|||||||
@NonNull String positionBlockFileStorageId,
|
@NonNull String positionBlockFileStorageId,
|
||||||
@NonNull String pageFileStorageId,
|
@NonNull String pageFileStorageId,
|
||||||
@NonNull String simplifiedTextStorageId,
|
@NonNull String simplifiedTextStorageId,
|
||||||
|
@NonNull String viewerDocumentStorageId,
|
||||||
@NonNull String sectionGridStorageId) {
|
@NonNull String sectionGridStorageId) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -2,6 +2,7 @@ package com.knecon.fforesight.service.layoutparser.processor;
|
|||||||
|
|
||||||
import static java.lang.String.format;
|
import static java.lang.String.format;
|
||||||
|
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@ -44,6 +45,7 @@ import com.knecon.fforesight.service.layoutparser.processor.services.factory.Doc
|
|||||||
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper;
|
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.TaasDocumentDataMapper;
|
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.TaasDocumentDataMapper;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.services.parsing.PDFLinesTextStripper;
|
import com.knecon.fforesight.service.layoutparser.processor.services.parsing.PDFLinesTextStripper;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.services.visualization.ViewerDocumentService;
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
@ -69,6 +71,8 @@ public class LayoutParsingPipeline {
|
|||||||
private final TaasBlockificationService taasBlockificationService;
|
private final TaasBlockificationService taasBlockificationService;
|
||||||
private final DocuMineBlockificationService docuMineBlockificationService;
|
private final DocuMineBlockificationService docuMineBlockificationService;
|
||||||
private final RedactManagerBlockificationService redactManagerBlockificationService;
|
private final RedactManagerBlockificationService redactManagerBlockificationService;
|
||||||
|
private final ViewerDocumentService viewerDocumentService;
|
||||||
|
|
||||||
|
|
||||||
public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException {
|
public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException {
|
||||||
|
|
||||||
@ -93,6 +97,12 @@ public class LayoutParsingPipeline {
|
|||||||
layoutParsingStorageService.storeSectionGrid(layoutParsingRequest, sectionGridCreatorService.createSectionGrid(documentGraph));
|
layoutParsingStorageService.storeSectionGrid(layoutParsingRequest, sectionGridCreatorService.createSectionGrid(documentGraph));
|
||||||
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentGraph));
|
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentGraph));
|
||||||
layoutParsingStorageService.storeSimplifiedText(layoutParsingRequest, simplifiedSectionTextService.toSimplifiedText(documentGraph));
|
layoutParsingStorageService.storeSimplifiedText(layoutParsingRequest, simplifiedSectionTextService.toSimplifiedText(documentGraph));
|
||||||
|
|
||||||
|
try (var out = new ByteArrayOutputStream()) {
|
||||||
|
viewerDocumentService.createViewerDocument(originDocument, documentGraph, out);
|
||||||
|
layoutParsingStorageService.storeViewerDocument(layoutParsingRequest, out);
|
||||||
|
}
|
||||||
|
|
||||||
if (layoutParsingRequest.layoutParsingType().equals(LayoutParsingType.TAAS)) {
|
if (layoutParsingRequest.layoutParsingType().equals(LayoutParsingType.TAAS)) {
|
||||||
var researchDocumentData = TaasDocumentDataMapper.fromDocument(documentGraph);
|
var researchDocumentData = TaasDocumentDataMapper.fromDocument(documentGraph);
|
||||||
layoutParsingStorageService.storeResearchDocumentData(layoutParsingRequest, researchDocumentData);
|
layoutParsingStorageService.storeResearchDocumentData(layoutParsingRequest, researchDocumentData);
|
||||||
@ -176,7 +186,6 @@ public class LayoutParsingPipeline {
|
|||||||
classificationPages.add(classificationPage);
|
classificationPages.add(classificationPage);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bodyTextFrameService.setBodyTextFrames(classificationDocument, layoutParsingType);
|
bodyTextFrameService.setBodyTextFrames(classificationDocument, layoutParsingType);
|
||||||
|
|
||||||
switch (layoutParsingType) {
|
switch (layoutParsingType) {
|
||||||
@ -191,6 +200,7 @@ public class LayoutParsingPipeline {
|
|||||||
return classificationDocument;
|
return classificationDocument;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void increaseDocumentStatistics(ClassificationPage classificationPage, ClassificationDocument document) {
|
private void increaseDocumentStatistics(ClassificationPage classificationPage, ClassificationDocument document) {
|
||||||
|
|
||||||
if (!classificationPage.isLandscape()) {
|
if (!classificationPage.isLandscape()) {
|
||||||
@ -221,6 +231,4 @@ public class LayoutParsingPipeline {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,7 @@
|
|||||||
package com.knecon.fforesight.service.layoutparser.processor;
|
package com.knecon.fforesight.service.layoutparser.processor;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@ -125,6 +127,7 @@ public class LayoutParsingStorageService {
|
|||||||
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.simplifiedTextStorageId(), simplifiedText);
|
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.simplifiedTextStorageId(), simplifiedText);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
private InputStream getObject(String storageId) {
|
private InputStream getObject(String storageId) {
|
||||||
|
|
||||||
@ -134,4 +137,14 @@ public class LayoutParsingStorageService {
|
|||||||
return Files.newInputStream(path, StandardOpenOption.DELETE_ON_CLOSE);
|
return Files.newInputStream(path, StandardOpenOption.DELETE_ON_CLOSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@SneakyThrows
|
||||||
|
public void storeViewerDocument(LayoutParsingRequest layoutParsingRequest, ByteArrayOutputStream out) {
|
||||||
|
|
||||||
|
try (var in = new ByteArrayInputStream(out.toByteArray())) {
|
||||||
|
|
||||||
|
storageService.storeObject(TenantContext.getTenantId(), layoutParsingRequest.viewerDocumentStorageId(), in);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -0,0 +1,8 @@
|
|||||||
|
package com.knecon.fforesight.service.layoutparser.processor.model.visualization;
|
||||||
|
|
||||||
|
import java.awt.Color;
|
||||||
|
import java.awt.geom.Line2D;
|
||||||
|
|
||||||
|
public record ColoredLine(Line2D line, Color color) {
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,8 @@
|
|||||||
|
package com.knecon.fforesight.service.layoutparser.processor.model.visualization;
|
||||||
|
|
||||||
|
import java.awt.Color;
|
||||||
|
import java.awt.geom.Rectangle2D;
|
||||||
|
|
||||||
|
public record ColoredRectangle(Rectangle2D rectangle2D, Color color) {
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,8 @@
|
|||||||
|
package com.knecon.fforesight.service.layoutparser.processor.model.visualization;
|
||||||
|
|
||||||
|
import java.awt.Color;
|
||||||
|
import java.awt.geom.Rectangle2D;
|
||||||
|
|
||||||
|
public record FilledRectangle(Rectangle2D rectangle2D, Color color, float alpha) {
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,27 @@
|
|||||||
|
package com.knecon.fforesight.service.layoutparser.processor.model.visualization;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import lombok.AccessLevel;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.experimental.FieldDefaults;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||||
|
public class LayoutGrid {
|
||||||
|
|
||||||
|
int numberOfPages;
|
||||||
|
List<VisualizationsOnPage> visualizationsPerPages;
|
||||||
|
|
||||||
|
|
||||||
|
public LayoutGrid(int numberOfPages) {
|
||||||
|
|
||||||
|
this.numberOfPages = numberOfPages;
|
||||||
|
this.visualizationsPerPages = new ArrayList<>(numberOfPages);
|
||||||
|
for (int i = 0; i < numberOfPages; i++) {
|
||||||
|
this.visualizationsPerPages.add(VisualizationsOnPage.builder().pageNumber(i).build());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,7 @@
|
|||||||
|
package com.knecon.fforesight.service.layoutparser.processor.model.visualization;
|
||||||
|
|
||||||
|
import java.awt.geom.Point2D;
|
||||||
|
|
||||||
|
public record PlacedText(String text, Point2D lineStart) {
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,26 @@
|
|||||||
|
package com.knecon.fforesight.service.layoutparser.processor.model.visualization;
|
||||||
|
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import lombok.AccessLevel;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.experimental.FieldDefaults;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Builder
|
||||||
|
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||||
|
public class VisualizationsOnPage {
|
||||||
|
|
||||||
|
int pageNumber;
|
||||||
|
@Builder.Default
|
||||||
|
List<PlacedText> placedTexts = new LinkedList<>();
|
||||||
|
@Builder.Default
|
||||||
|
List<ColoredLine> coloredLines = new LinkedList<>();
|
||||||
|
@Builder.Default
|
||||||
|
List<ColoredRectangle> coloredRectangles = new LinkedList<>();
|
||||||
|
@Builder.Default
|
||||||
|
List<FilledRectangle> filledRectangles = new LinkedList<>();
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,232 @@
|
|||||||
|
package com.knecon.fforesight.service.layoutparser.processor.services.visualization;
|
||||||
|
|
||||||
|
import static com.knecon.fforesight.service.layoutparser.processor.services.visualization.ViewerDocumentService.LINE_WIDTH;
|
||||||
|
|
||||||
|
import java.awt.Color;
|
||||||
|
import java.awt.geom.Line2D;
|
||||||
|
import java.awt.geom.Point2D;
|
||||||
|
import java.awt.geom.Rectangle2D;
|
||||||
|
import java.awt.geom.RectangularShape;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.ColoredLine;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.ColoredRectangle;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.LayoutGrid;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.PlacedText;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
|
||||||
|
|
||||||
|
@Service
|
||||||
|
public class LayoutGridService {
|
||||||
|
|
||||||
|
private static final Color INNER_LINES_COLOR = new Color(255, 175, 175);
|
||||||
|
private static final Color PARAGRAPH_COLOR = new Color(70, 130, 180);
|
||||||
|
public static final Color TABLE_COLOR = new Color(102, 205, 170);
|
||||||
|
public static final Color SECTION_COLOR = new Color(23, 23, 23);
|
||||||
|
public static final Color HEADLINE_COLOR = new Color(162, 56, 56);
|
||||||
|
public static final Color HEADER_COLOR = new Color(171, 131, 6);
|
||||||
|
public static final Color IMAGE_COLOR = new Color(253, 63, 146);
|
||||||
|
|
||||||
|
|
||||||
|
public LayoutGrid createLayoutGrid(Document document) {
|
||||||
|
|
||||||
|
LayoutGrid layoutGrid = new LayoutGrid(document.getNumberOfPages());
|
||||||
|
document.streamAllSubNodes().forEach(semanticNode -> {
|
||||||
|
Color color = switch (semanticNode.getType()) {
|
||||||
|
case PARAGRAPH -> PARAGRAPH_COLOR;
|
||||||
|
case TABLE -> TABLE_COLOR;
|
||||||
|
case SECTION -> SECTION_COLOR;
|
||||||
|
case HEADLINE -> HEADLINE_COLOR;
|
||||||
|
case HEADER, FOOTER -> HEADER_COLOR;
|
||||||
|
case IMAGE -> IMAGE_COLOR;
|
||||||
|
default -> null;
|
||||||
|
};
|
||||||
|
if (isNotSectionOrTableCellOrDocument(semanticNode)) {
|
||||||
|
addAsRectangle(semanticNode, layoutGrid, color);
|
||||||
|
}
|
||||||
|
if (semanticNode.getType().equals(NodeType.SECTION)) {
|
||||||
|
addSection(semanticNode, layoutGrid, color);
|
||||||
|
}
|
||||||
|
if (semanticNode.getType().equals(NodeType.TABLE)) {
|
||||||
|
Table table = (Table) semanticNode;
|
||||||
|
addInnerTableLines(table, layoutGrid, INNER_LINES_COLOR);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return layoutGrid;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void addInnerTableLines(Table table, LayoutGrid layoutGrid, Color color) {
|
||||||
|
|
||||||
|
if (table.getNumberOfCols() < 1 || table.getNumberOfRows() < 1) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// only draw inner lines -> remove first values
|
||||||
|
List<Double> xs = table.streamRow(0).map(TableCell::getBBox).map(map -> map.values().stream().findAny().get()).map(RectangularShape::getMinX).collect(Collectors.toList());
|
||||||
|
xs.remove(0);
|
||||||
|
List<Double> ys = table.streamCol(0).map(TableCell::getBBox).map(map -> map.values().stream().findAny().get()).map(RectangularShape::getMaxY).collect(Collectors.toList());
|
||||||
|
ys.remove(0);
|
||||||
|
Rectangle2D tableBBox = table.getBBox().get(table.getFirstPage());
|
||||||
|
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(table.getFirstPage().getNumber() - 1).getColoredLines();
|
||||||
|
xs.forEach(x -> {
|
||||||
|
Line2D line = new Line2D.Double(new Point2D.Double(x, tableBBox.getMaxY()), new Point2D.Double(x, tableBBox.getMinY()));
|
||||||
|
coloredLines.add(new ColoredLine(line, color));
|
||||||
|
});
|
||||||
|
ys.forEach(y -> {
|
||||||
|
Line2D line = new Line2D.Double(new Point2D.Double(tableBBox.getMinX(), y), new Point2D.Double(tableBBox.getMaxX(), y));
|
||||||
|
coloredLines.add(new ColoredLine(line, color));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void addSection(SemanticNode semanticNode, LayoutGrid layoutGrid, Color color) {
|
||||||
|
|
||||||
|
Map<Page, Rectangle2D> bBoxMap = semanticNode.getBBox();
|
||||||
|
List<SemanticNode> subSections = semanticNode.streamAllSubNodesOfType(NodeType.SECTION).toList();
|
||||||
|
if (!subSections.isEmpty()) {
|
||||||
|
Page firstPage = semanticNode.getFirstPage();
|
||||||
|
addPlacedText(firstPage, bBoxMap.get(firstPage), buildTreeIdString(semanticNode), layoutGrid);
|
||||||
|
} else {
|
||||||
|
bBoxMap.forEach(((page, textBBox) -> addPlacedText(page, textBBox, buildTreeIdString(semanticNode), layoutGrid)));
|
||||||
|
}
|
||||||
|
if (bBoxMap.values().size() == 1) {
|
||||||
|
Rectangle2D r = RectangleTransformations.pad(bBoxMap.values().stream().findFirst().get(), LINE_WIDTH, LINE_WIDTH);
|
||||||
|
int pageNumber = bBoxMap.keySet().stream().findFirst().get().getNumber() - 1;
|
||||||
|
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(pageNumber).getColoredLines();
|
||||||
|
List<Line2D> lines = createLinesFromRectangle(r);
|
||||||
|
// add string to top line
|
||||||
|
var firstLine = lines.remove(0);
|
||||||
|
coloredLines.add(new ColoredLine(firstLine, color));
|
||||||
|
for (Line2D line : lines) {
|
||||||
|
coloredLines.add(new ColoredLine(line, color));
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
List<Page> pagesInOrder = bBoxMap.keySet().stream().sorted(Comparator.comparingInt(Page::getNumber)).collect(Collectors.toList());
|
||||||
|
var firstPage = pagesInOrder.remove(0);
|
||||||
|
addLinesForFirstPageOfSection(semanticNode, color, firstPage, layoutGrid);
|
||||||
|
var lastPage = pagesInOrder.remove(pagesInOrder.size() - 1);
|
||||||
|
addLinesForLastPageOfSection(semanticNode, color, lastPage, layoutGrid);
|
||||||
|
for (Page middlePage : pagesInOrder) {
|
||||||
|
addLinesForMiddlePageOfSection(semanticNode, color, middlePage, layoutGrid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void addPlacedText(Page page, Rectangle2D textBBox, String s, LayoutGrid layoutGrid) {
|
||||||
|
|
||||||
|
var placedTexts = layoutGrid.getVisualizationsPerPages().get(page.getNumber() - 1).getPlacedTexts();
|
||||||
|
placedTexts.add(new PlacedText(s, new Point2D.Float((float) (textBBox.getMinX()), (float) textBBox.getMaxY())));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void addLinesForMiddlePageOfSection(SemanticNode semanticNode, Color color, Page middlePage, LayoutGrid layoutGrid) {
|
||||||
|
|
||||||
|
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(middlePage.getNumber() - 1).getColoredLines();
|
||||||
|
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(middlePage), LINE_WIDTH, LINE_WIDTH);
|
||||||
|
var midPageLines = createLinesFromRectangle(r);
|
||||||
|
// remove top line
|
||||||
|
midPageLines.remove(0);
|
||||||
|
// remove top line
|
||||||
|
midPageLines.remove(1);
|
||||||
|
// add string to left line
|
||||||
|
var leftLine = midPageLines.remove(1);
|
||||||
|
coloredLines.add(new ColoredLine(leftLine, color));
|
||||||
|
for (Line2D line : midPageLines) {
|
||||||
|
coloredLines.add(new ColoredLine(line, color));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void addLinesForLastPageOfSection(SemanticNode semanticNode, Color color, Page lastPage, LayoutGrid layoutGrid) {
|
||||||
|
|
||||||
|
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(lastPage.getNumber() - 1).getColoredLines();
|
||||||
|
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(lastPage), LINE_WIDTH, LINE_WIDTH);
|
||||||
|
var lastPageLines = createLinesFromRectangle(r);
|
||||||
|
// remove top line
|
||||||
|
lastPageLines.remove(0);
|
||||||
|
// add string to left line
|
||||||
|
var leftLine = lastPageLines.remove(2);
|
||||||
|
coloredLines.add(new ColoredLine(leftLine, color));
|
||||||
|
for (Line2D line : lastPageLines) {
|
||||||
|
coloredLines.add(new ColoredLine(line, color));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void addLinesForFirstPageOfSection(SemanticNode semanticNode, Color color, Page firstPage, LayoutGrid layoutGrid) {
|
||||||
|
|
||||||
|
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(firstPage.getNumber() - 1).getColoredLines();
|
||||||
|
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(firstPage), LINE_WIDTH, LINE_WIDTH);
|
||||||
|
var firstPageLines = createLinesFromRectangle(r);
|
||||||
|
// remove bottom line
|
||||||
|
firstPageLines.remove(2);
|
||||||
|
// add string to top line
|
||||||
|
var firstLine = firstPageLines.remove(0);
|
||||||
|
coloredLines.add(new ColoredLine(firstLine, color));
|
||||||
|
for (Line2D line : firstPageLines) {
|
||||||
|
coloredLines.add(new ColoredLine(line, color));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private String buildTreeIdString(SemanticNode semanticNode) {
|
||||||
|
|
||||||
|
return semanticNode.getTreeId().stream().map(Object::toString).collect(Collectors.joining("."));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
A __________________ B
|
||||||
|
| |
|
||||||
|
| |
|
||||||
|
| |
|
||||||
|
| |
|
||||||
|
D|__________________| C
|
||||||
|
The returned List are the lines [AB, BC, DC, AD]
|
||||||
|
*/
|
||||||
|
private List<Line2D> createLinesFromRectangle(Rectangle2D r) {
|
||||||
|
// +0.5 to join the lines
|
||||||
|
List<Line2D> lines = new ArrayList<>(4);
|
||||||
|
float lineWidthCorrection = LINE_WIDTH * 0.5f;
|
||||||
|
Point2D.Float a = new Point2D.Float((float) r.getMinX(), (float) r.getMaxY());
|
||||||
|
Point2D.Float a1 = new Point2D.Float((float) r.getMinX() - lineWidthCorrection, (float) r.getMaxY());
|
||||||
|
Point2D.Float b = new Point2D.Float((float) r.getMaxX(), (float) r.getMaxY());
|
||||||
|
Point2D.Float b1 = new Point2D.Float((float) r.getMaxX() + lineWidthCorrection, (float) r.getMaxY());
|
||||||
|
Point2D.Float c = new Point2D.Float((float) r.getMaxX(), (float) r.getMinY());
|
||||||
|
Point2D.Float c1 = new Point2D.Float((float) r.getMaxX() + lineWidthCorrection, (float) r.getMinY());
|
||||||
|
Point2D.Float d = new Point2D.Float((float) r.getMinX(), (float) r.getMinY());
|
||||||
|
Point2D.Float d1 = new Point2D.Float((float) r.getMinX() - lineWidthCorrection, (float) r.getMinY());
|
||||||
|
lines.add(new Line2D.Float(a1, b1));
|
||||||
|
lines.add(new Line2D.Float(b, c));
|
||||||
|
lines.add(new Line2D.Float(d1, c1));
|
||||||
|
lines.add(new Line2D.Float(a, d));
|
||||||
|
return lines;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static boolean isNotSectionOrTableCellOrDocument(SemanticNode semanticNode) {
|
||||||
|
|
||||||
|
return !(semanticNode.getType().equals(NodeType.DOCUMENT) || semanticNode.getType().equals(NodeType.SECTION) || semanticNode.getType().equals(NodeType.TABLE_CELL));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void addAsRectangle(SemanticNode semanticNode, LayoutGrid layoutGrid, Color color) {
|
||||||
|
|
||||||
|
semanticNode.getBBox()
|
||||||
|
.forEach((page, textBBox) -> layoutGrid.getVisualizationsPerPages().get(page.getNumber() - 1).getColoredRectangles().add(new ColoredRectangle(textBBox, color)));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,180 @@
|
|||||||
|
package com.knecon.fforesight.service.layoutparser.processor.services.visualization;
|
||||||
|
|
||||||
|
import java.awt.geom.AffineTransform;
|
||||||
|
import java.awt.geom.Rectangle2D;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.pdfbox.cos.COSDictionary;
|
||||||
|
import org.apache.pdfbox.cos.COSName;
|
||||||
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
|
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
|
||||||
|
import org.apache.pdfbox.pdmodel.PDPage;
|
||||||
|
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||||
|
import org.apache.pdfbox.pdmodel.PDResources;
|
||||||
|
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||||
|
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||||
|
import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentGroup;
|
||||||
|
import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentProperties;
|
||||||
|
import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;
|
||||||
|
import org.apache.pdfbox.util.Matrix;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.ColoredLine;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.ColoredRectangle;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.FilledRectangle;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.LayoutGrid;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.PlacedText;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.visualization.VisualizationsOnPage;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.SneakyThrows;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class ViewerDocumentService {
|
||||||
|
|
||||||
|
private static final String layerName = "Layout grid";
|
||||||
|
|
||||||
|
private static final int FONT_SIZE = 10;
|
||||||
|
public static final float LINE_WIDTH = 1.5f;
|
||||||
|
|
||||||
|
private final LayoutGridService layoutGridService;
|
||||||
|
|
||||||
|
|
||||||
|
@SneakyThrows
|
||||||
|
public void createViewerDocument(PDDocument pdDocument, Document document, OutputStream outputStream) {
|
||||||
|
|
||||||
|
log.info("Start Viewer Document Creation");
|
||||||
|
LayoutGrid layoutGrid = layoutGridService.createLayoutGrid(document);
|
||||||
|
log.info("Created Layout Grid");
|
||||||
|
// PDDocument.save() is very slow, since it actually traverses the entire pdf and writes a new one.
|
||||||
|
// If we collect all COSDictionaries we changed and tell it explicitly to only add the changed ones it's very fast.
|
||||||
|
Set<COSDictionary> dictionariesToUpdate = new HashSet<>();
|
||||||
|
PDOptionalContentGroup layer = addLayerToDocument(pdDocument, dictionariesToUpdate);
|
||||||
|
PDFont font = PDType1Font.HELVETICA;
|
||||||
|
|
||||||
|
for (int pageNumber = 0; pageNumber < pdDocument.getNumberOfPages(); pageNumber++) {
|
||||||
|
PDPage pdPage = pdDocument.getPage(pageNumber);
|
||||||
|
|
||||||
|
AffineTransform textDeRotationMatrix = getTextDeRotationTransform(pdPage);
|
||||||
|
addLayerToPageRessources(pdPage);
|
||||||
|
|
||||||
|
// We need to save the graphics state before, such that our appended content cannot be affected by previous content streams with side effects,
|
||||||
|
// e.g. not escaped matrix transformations.
|
||||||
|
escapePreviousContents(pdDocument, pdPage);
|
||||||
|
|
||||||
|
VisualizationsOnPage visualizationsOnPage = layoutGrid.getVisualizationsPerPages().get(pageNumber);
|
||||||
|
assert pageNumber == visualizationsOnPage.getPageNumber();
|
||||||
|
// We need to append to the content stream, otherwise the content could be overlapped by following content.
|
||||||
|
try (var contentStream = new PDPageContentStream(pdDocument, pdPage, PDPageContentStream.AppendMode.APPEND, true)) {
|
||||||
|
|
||||||
|
contentStream.beginMarkedContent(COSName.OC, layer);
|
||||||
|
contentStream.saveGraphicsState();
|
||||||
|
|
||||||
|
contentStream.setLineWidth(LINE_WIDTH);
|
||||||
|
for (ColoredLine coloredLine : visualizationsOnPage.getColoredLines()) {
|
||||||
|
contentStream.setStrokingColor(coloredLine.color());
|
||||||
|
contentStream.moveTo((float) coloredLine.line().getX1(), (float) coloredLine.line().getY1());
|
||||||
|
contentStream.lineTo((float) coloredLine.line().getX2(), (float) coloredLine.line().getY2());
|
||||||
|
contentStream.stroke();
|
||||||
|
}
|
||||||
|
for (ColoredRectangle coloredRectangle : visualizationsOnPage.getColoredRectangles()) {
|
||||||
|
contentStream.setStrokingColor(coloredRectangle.color());
|
||||||
|
Rectangle2D r = coloredRectangle.rectangle2D();
|
||||||
|
contentStream.addRect((float) r.getX(), (float) r.getY(), (float) r.getWidth(), (float) r.getHeight());
|
||||||
|
contentStream.stroke();
|
||||||
|
}
|
||||||
|
for (FilledRectangle filledRectangle : visualizationsOnPage.getFilledRectangles()) {
|
||||||
|
contentStream.setNonStrokingColor(filledRectangle.color());
|
||||||
|
PDExtendedGraphicsState graphicsState = new PDExtendedGraphicsState();
|
||||||
|
graphicsState.setNonStrokingAlphaConstant(filledRectangle.alpha());
|
||||||
|
contentStream.setGraphicsStateParameters(graphicsState);
|
||||||
|
Rectangle2D r = filledRectangle.rectangle2D();
|
||||||
|
contentStream.addRect((float) r.getX(), (float) r.getY(), (float) r.getWidth(), (float) r.getHeight());
|
||||||
|
contentStream.fill();
|
||||||
|
}
|
||||||
|
for (PlacedText placedText : visualizationsOnPage.getPlacedTexts()) {
|
||||||
|
contentStream.setFont(font, FONT_SIZE);
|
||||||
|
contentStream.beginText();
|
||||||
|
Matrix textMatrix = new Matrix((float) textDeRotationMatrix.getScaleX(),
|
||||||
|
(float) textDeRotationMatrix.getShearX(),
|
||||||
|
(float) textDeRotationMatrix.getShearY(),
|
||||||
|
(float) textDeRotationMatrix.getScaleY(),
|
||||||
|
(float) placedText.lineStart().getX(),
|
||||||
|
(float) placedText.lineStart().getY());
|
||||||
|
textMatrix.translate(-((font.getStringWidth(placedText.text()) / 1000) * FONT_SIZE + (2 * LINE_WIDTH) + 4), -FONT_SIZE);
|
||||||
|
contentStream.setTextMatrix(textMatrix);
|
||||||
|
contentStream.showText(placedText.text());
|
||||||
|
contentStream.endText();
|
||||||
|
}
|
||||||
|
contentStream.restoreGraphicsState();
|
||||||
|
contentStream.endMarkedContent();
|
||||||
|
}
|
||||||
|
dictionariesToUpdate.add(pdPage.getCOSObject());
|
||||||
|
dictionariesToUpdate.add(pdPage.getResources().getCOSObject());
|
||||||
|
}
|
||||||
|
log.info("Written Layoutgrid to pdf streams");
|
||||||
|
pdDocument.saveIncremental(outputStream, dictionariesToUpdate);
|
||||||
|
log.info("Saved Viewer Document");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static void addLayerToPageRessources(PDPage pdPage) {
|
||||||
|
|
||||||
|
PDResources resources = pdPage.getResources();
|
||||||
|
if (resources == null) {
|
||||||
|
resources = new PDResources();
|
||||||
|
pdPage.setResources(resources);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static void escapePreviousContents(PDDocument pdDocument, PDPage pdPage) throws IOException {
|
||||||
|
|
||||||
|
try (var contentStream = new PDPageContentStream(pdDocument, pdPage, PDPageContentStream.AppendMode.PREPEND, false)) {
|
||||||
|
contentStream.saveGraphicsState();
|
||||||
|
}
|
||||||
|
try (var contentStream = new PDPageContentStream(pdDocument, pdPage, PDPageContentStream.AppendMode.APPEND, false)) {
|
||||||
|
contentStream.restoreGraphicsState();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static PDOptionalContentGroup addLayerToDocument(PDDocument pdDocument, Set<COSDictionary> dictionariesToUpdate) {
|
||||||
|
|
||||||
|
PDDocumentCatalog catalog = pdDocument.getDocumentCatalog();
|
||||||
|
PDOptionalContentProperties ocprops = catalog.getOCProperties();
|
||||||
|
if (ocprops == null) {
|
||||||
|
ocprops = new PDOptionalContentProperties();
|
||||||
|
catalog.setOCProperties(ocprops);
|
||||||
|
}
|
||||||
|
PDOptionalContentGroup layer = null;
|
||||||
|
if (ocprops.hasGroup(layerName)) {
|
||||||
|
layer = ocprops.getGroup(layerName);
|
||||||
|
} else {
|
||||||
|
layer = new PDOptionalContentGroup(layerName);
|
||||||
|
ocprops.addGroup(layer);
|
||||||
|
}
|
||||||
|
ocprops.setGroupEnabled(layer, true);
|
||||||
|
dictionariesToUpdate.add(catalog.getCOSObject());
|
||||||
|
return layer;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static AffineTransform getTextDeRotationTransform(PDPage page) {
|
||||||
|
|
||||||
|
return AffineTransform.getQuadrantRotateInstance(switch (page.getRotation()) {
|
||||||
|
case 90 -> 3;
|
||||||
|
case 180 -> 2;
|
||||||
|
case 270 -> 1;
|
||||||
|
default -> 0;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -13,7 +13,7 @@ plugins {
|
|||||||
description = "layoutparser-service-server"
|
description = "layoutparser-service-server"
|
||||||
|
|
||||||
val jacksonVersion = "2.15.2"
|
val jacksonVersion = "2.15.2"
|
||||||
val pdfBoxVersion = "3.0.0-alpha2"
|
val pdfBoxVersion = "3.0.0-RC1"
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
implementation(project(":layoutparser-service-processor"))
|
implementation(project(":layoutparser-service-processor"))
|
||||||
|
|||||||
@ -47,7 +47,10 @@ public class BdrJsonBuildTest extends AbstractTest {
|
|||||||
|
|
||||||
try (InputStream inputStream = new FileInputStream(filename)) {
|
try (InputStream inputStream = new FileInputStream(filename)) {
|
||||||
try (PDDocument pdDocument = Loader.loadPDF(inputStream)) {
|
try (PDDocument pdDocument = Loader.loadPDF(inputStream)) {
|
||||||
return DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.TAAS, pdDocument, new ImageServiceResponse(), new TableServiceResponse()));
|
return DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.TAAS,
|
||||||
|
pdDocument,
|
||||||
|
new ImageServiceResponse(),
|
||||||
|
new TableServiceResponse()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -30,10 +30,10 @@ import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
|||||||
import com.iqser.red.storage.commons.service.StorageService;
|
import com.iqser.red.storage.commons.service.StorageService;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipeline;
|
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipeline;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||||
import com.knecon.fforesight.tenantcommons.TenantsClient;
|
import com.knecon.fforesight.tenantcommons.TenantsClient;
|
||||||
|
|
||||||
|
|||||||
@ -40,6 +40,13 @@ public class BuildDocumentGraphTest extends AbstractTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
protected Document buildGraph(String filename) {
|
protected Document buildGraph(String filename) {
|
||||||
|
|
||||||
|
return buildGraph(filename, LayoutParsingType.REDACT_MANAGER);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@SneakyThrows
|
||||||
|
protected Document buildGraph(String filename, LayoutParsingType layoutParsingType) {
|
||||||
|
|
||||||
if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf")) {
|
if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf")) {
|
||||||
prepareStorage(filename, "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json");
|
prepareStorage(filename, "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json");
|
||||||
} else {
|
} else {
|
||||||
@ -48,7 +55,7 @@ public class BuildDocumentGraphTest extends AbstractTest {
|
|||||||
ClassPathResource fileResource = new ClassPathResource(filename);
|
ClassPathResource fileResource = new ClassPathResource(filename);
|
||||||
|
|
||||||
try (InputStream inputStream = fileResource.getInputStream(); PDDocument pdDocument = Loader.loadPDF(inputStream)) {
|
try (InputStream inputStream = fileResource.getInputStream(); PDDocument pdDocument = Loader.loadPDF(inputStream)) {
|
||||||
return DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.DOCUMINE,
|
return DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(layoutParsingType,
|
||||||
pdDocument,
|
pdDocument,
|
||||||
layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID),
|
layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID),
|
||||||
new TableServiceResponse()));
|
new TableServiceResponse()));
|
||||||
|
|||||||
@ -15,9 +15,9 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||||||
import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper;
|
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper;
|
||||||
|
|
||||||
|
|||||||
@ -16,12 +16,14 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.textbloc
|
|||||||
import com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw;
|
import com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw;
|
||||||
|
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
public class DocumentGraphVisualizationTest extends BuildDocumentGraphTest {
|
public class DocumentGraphVisualizationTest extends BuildDocumentGraphTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
@Disabled
|
// @Disabled
|
||||||
public void visualizeMetolachlor() {
|
public void visualizeMetolachlor() {
|
||||||
|
|
||||||
String filename = "files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
|
String filename = "files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
|
||||||
@ -67,9 +69,12 @@ public class DocumentGraphVisualizationTest extends BuildDocumentGraphTest {
|
|||||||
try (var fileStream = fileResource.getInputStream();//
|
try (var fileStream = fileResource.getInputStream();//
|
||||||
PDDocument pdDocument = Loader.loadPDF(fileStream)//
|
PDDocument pdDocument = Loader.loadPDF(fileStream)//
|
||||||
) {
|
) {
|
||||||
|
log.info("drawing document");
|
||||||
PdfDraw.drawDocumentGraph(pdDocument, documentGraph);
|
PdfDraw.drawDocumentGraph(pdDocument, documentGraph);
|
||||||
PdfDraw.drawTextBlock(pdDocument, textBlock, PdfDraw.Options.builder().stroke(true).strokeWidth(0.1f).strokeColor(Color.YELLOW).build());
|
PdfDraw.drawTextBlock(pdDocument, textBlock, PdfDraw.Options.builder().stroke(true).strokeWidth(0.1f).strokeColor(Color.YELLOW).build());
|
||||||
|
log.info("saving document");
|
||||||
pdDocument.save(tmpFile);
|
pdDocument.save(tmpFile);
|
||||||
|
log.info("saved document");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -0,0 +1,33 @@
|
|||||||
|
package com.knecon.fforesight.service.layoutparser.server.graph;
|
||||||
|
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
import org.apache.pdfbox.Loader;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.springframework.core.io.ClassPathResource;
|
||||||
|
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.services.visualization.LayoutGridService;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.services.visualization.ViewerDocumentService;
|
||||||
|
|
||||||
|
import lombok.SneakyThrows;
|
||||||
|
|
||||||
|
public class ViewerDocumentTest extends BuildDocumentGraphTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@SneakyThrows
|
||||||
|
public void testViewerDocument() {
|
||||||
|
|
||||||
|
LayoutGridService layoutGridService = new LayoutGridService();
|
||||||
|
ViewerDocumentService viewerDocumentService = new ViewerDocumentService(layoutGridService);
|
||||||
|
String fileName = "files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
|
||||||
|
Document document = buildGraph(fileName, LayoutParsingType.REDACT_MANAGER);
|
||||||
|
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
|
||||||
|
try (var pdDocument = Loader.loadPDF(new ClassPathResource(fileName).getInputStream()); var out = new FileOutputStream(tmpFileName)) {
|
||||||
|
viewerDocumentService.createViewerDocument(pdDocument, document, out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -54,6 +54,7 @@ public abstract class AbstractTest {
|
|||||||
protected final static String PAGES_FILE_ID = "pages";
|
protected final static String PAGES_FILE_ID = "pages";
|
||||||
protected final static String TENANT_ID = "tenant";
|
protected final static String TENANT_ID = "tenant";
|
||||||
protected final static String SECTION_GRID_ID = "section";
|
protected final static String SECTION_GRID_ID = "section";
|
||||||
|
protected final static String VIEWER_DOCUMENT_ID = "viewer";
|
||||||
protected final static String SIMPLIFIED_ID = "simplified";
|
protected final static String SIMPLIFIED_ID = "simplified";
|
||||||
|
|
||||||
|
|
||||||
@ -70,6 +71,7 @@ public abstract class AbstractTest {
|
|||||||
.pageFileStorageId(PAGES_FILE_ID)
|
.pageFileStorageId(PAGES_FILE_ID)
|
||||||
.simplifiedTextStorageId(SIMPLIFIED_ID)
|
.simplifiedTextStorageId(SIMPLIFIED_ID)
|
||||||
.sectionGridStorageId(SECTION_GRID_ID)
|
.sectionGridStorageId(SECTION_GRID_ID)
|
||||||
|
.viewerDocumentStorageId(VIEWER_DOCUMENT_ID)
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -110,6 +112,7 @@ public abstract class AbstractTest {
|
|||||||
.pageFileStorageId(PAGES_FILE_ID)
|
.pageFileStorageId(PAGES_FILE_ID)
|
||||||
.simplifiedTextStorageId(SIMPLIFIED_ID)
|
.simplifiedTextStorageId(SIMPLIFIED_ID)
|
||||||
.sectionGridStorageId(SECTION_GRID_ID)
|
.sectionGridStorageId(SECTION_GRID_ID)
|
||||||
|
.viewerDocumentStorageId(VIEWER_DOCUMENT_ID)
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -143,6 +146,7 @@ public abstract class AbstractTest {
|
|||||||
.pageFileStorageId(PAGES_FILE_ID)
|
.pageFileStorageId(PAGES_FILE_ID)
|
||||||
.simplifiedTextStorageId(SIMPLIFIED_ID)
|
.simplifiedTextStorageId(SIMPLIFIED_ID)
|
||||||
.sectionGridStorageId(SECTION_GRID_ID)
|
.sectionGridStorageId(SECTION_GRID_ID)
|
||||||
|
.viewerDocumentStorageId(VIEWER_DOCUMENT_ID)
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -14,7 +14,6 @@ import org.apache.pdfbox.pdmodel.PDDocument;
|
|||||||
import org.apache.pdfbox.pdmodel.PDPage;
|
import org.apache.pdfbox.pdmodel.PDPage;
|
||||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||||
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
|
|
||||||
import org.apache.pdfbox.util.Matrix;
|
import org.apache.pdfbox.util.Matrix;
|
||||||
import org.springframework.core.io.ClassPathResource;
|
import org.springframework.core.io.ClassPathResource;
|
||||||
|
|
||||||
@ -144,7 +143,7 @@ public class PdfDraw {
|
|||||||
} else {
|
} else {
|
||||||
contentStream.newLineAtOffset((float) location.getX(), (float) location.getY());
|
contentStream.newLineAtOffset((float) location.getX(), (float) location.getY());
|
||||||
}
|
}
|
||||||
contentStream.setFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA), 10);
|
contentStream.setFont(PDType1Font.HELVETICA, 10);
|
||||||
contentStream.showText(string);
|
contentStream.showText(string);
|
||||||
contentStream.endText();
|
contentStream.endText();
|
||||||
contentStream.close();
|
contentStream.close();
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user