RED-8825: improve layoutparsing
* added improved debugging capabilities to viewer-doc * refactored coordinates (wip) * refactored line intersection algorithm * removed cropbox correction from pdfbox text positions
This commit is contained in:
parent
6fb1a0bef3
commit
3dd215288a
@ -101,29 +101,33 @@ public class LayoutParsingPipeline {
|
||||
log.info("Starting layout parsing for {}", layoutParsingRequest.identifier());
|
||||
|
||||
File originFile = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId());
|
||||
File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()).orElse(originFile);
|
||||
File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId())
|
||||
.orElse(originFile);
|
||||
|
||||
VisualLayoutParsingResponse visualLayoutParsingResponse = new VisualLayoutParsingResponse();
|
||||
if (layoutParsingRequest.visualLayoutParsingFileId().isPresent()) {
|
||||
if (layoutParsingRequest.visualLayoutParsingFileId()
|
||||
.isPresent()) {
|
||||
visualLayoutParsingResponse = layoutParsingStorageService.getVisualLayoutParsingFile(layoutParsingRequest.visualLayoutParsingFileId().get());
|
||||
}
|
||||
|
||||
ImageServiceResponse imageServiceResponse = new ImageServiceResponse();
|
||||
if (layoutParsingRequest.imagesFileStorageId().isPresent()) {
|
||||
if (layoutParsingRequest.imagesFileStorageId()
|
||||
.isPresent()) {
|
||||
imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId().get());
|
||||
}
|
||||
|
||||
TableServiceResponse tableServiceResponse = new TableServiceResponse();
|
||||
if (layoutParsingRequest.tablesFileStorageId().isPresent()) {
|
||||
if (layoutParsingRequest.tablesFileStorageId()
|
||||
.isPresent()) {
|
||||
tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId().get());
|
||||
}
|
||||
|
||||
ClassificationDocument classificationDocument = parseLayout(layoutParsingRequest.layoutParsingType(),
|
||||
originFile,
|
||||
imageServiceResponse,
|
||||
tableServiceResponse,
|
||||
visualLayoutParsingResponse,
|
||||
layoutParsingRequest.identifier());
|
||||
originFile,
|
||||
imageServiceResponse,
|
||||
tableServiceResponse,
|
||||
visualLayoutParsingResponse,
|
||||
layoutParsingRequest.identifier());
|
||||
|
||||
log.info("Building document graph for {}", layoutParsingRequest.identifier());
|
||||
|
||||
@ -155,25 +159,25 @@ public class LayoutParsingPipeline {
|
||||
.numberOfPages(documentGraph.getNumberOfPages())
|
||||
.duration(System.currentTimeMillis() - start)
|
||||
.message(format("""
|
||||
Layout parsing has finished in %.02f s.
|
||||
identifiers: %s
|
||||
%s
|
||||
Files have been saved with Ids:
|
||||
Structure: %s
|
||||
Text: %s
|
||||
Positions: %s
|
||||
PageData: %s
|
||||
Simplified Text: %s
|
||||
Viewer Doc: %s""",
|
||||
((float) (System.currentTimeMillis() - start)) / 1000,
|
||||
layoutParsingRequest.identifier(),
|
||||
buildSemanticNodeCountMessage(documentGraph.getNumberOfPages(), documentGraph.buildSemanticNodeCounts()),
|
||||
layoutParsingRequest.structureFileStorageId(),
|
||||
layoutParsingRequest.textBlockFileStorageId(),
|
||||
layoutParsingRequest.positionBlockFileStorageId(),
|
||||
layoutParsingRequest.pageFileStorageId(),
|
||||
layoutParsingRequest.simplifiedTextStorageId(),
|
||||
layoutParsingRequest.viewerDocumentStorageId()))
|
||||
Layout parsing has finished in %.02f s.
|
||||
identifiers: %s
|
||||
%s
|
||||
Files have been saved with Ids:
|
||||
Structure: %s
|
||||
Text: %s
|
||||
Positions: %s
|
||||
PageData: %s
|
||||
Simplified Text: %s
|
||||
Viewer Doc: %s""",
|
||||
((float) (System.currentTimeMillis() - start)) / 1000,
|
||||
layoutParsingRequest.identifier(),
|
||||
buildSemanticNodeCountMessage(documentGraph.getNumberOfPages(), documentGraph.buildSemanticNodeCounts()),
|
||||
layoutParsingRequest.structureFileStorageId(),
|
||||
layoutParsingRequest.textBlockFileStorageId(),
|
||||
layoutParsingRequest.positionBlockFileStorageId(),
|
||||
layoutParsingRequest.pageFileStorageId(),
|
||||
layoutParsingRequest.simplifiedTextStorageId(),
|
||||
layoutParsingRequest.viewerDocumentStorageId()))
|
||||
.build();
|
||||
|
||||
}
|
||||
@ -194,14 +198,14 @@ public class LayoutParsingPipeline {
|
||||
private String buildSemanticNodeCountMessage(int numberOfPages, Map<NodeType, Long> semanticNodeCounts) {
|
||||
|
||||
return String.format("%d pages with %d sections, %d headlines, %d paragraphs, %d tables with %d cells, %d headers, and %d footers parsed",
|
||||
numberOfPages,
|
||||
semanticNodeCounts.get(NodeType.SECTION) == null ? 0 : semanticNodeCounts.get(NodeType.SECTION),
|
||||
semanticNodeCounts.get(NodeType.HEADLINE) == null ? 0 : semanticNodeCounts.get(NodeType.HEADLINE),
|
||||
semanticNodeCounts.get(NodeType.PARAGRAPH) == null ? 0 : semanticNodeCounts.get(NodeType.PARAGRAPH),
|
||||
semanticNodeCounts.get(NodeType.TABLE) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE),
|
||||
semanticNodeCounts.get(NodeType.TABLE_CELL) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE_CELL),
|
||||
semanticNodeCounts.get(NodeType.HEADER) == null ? 0 : semanticNodeCounts.get(NodeType.HEADER),
|
||||
semanticNodeCounts.get(NodeType.FOOTER) == null ? 0 : semanticNodeCounts.get(NodeType.FOOTER));
|
||||
numberOfPages,
|
||||
semanticNodeCounts.get(NodeType.SECTION) == null ? 0 : semanticNodeCounts.get(NodeType.SECTION),
|
||||
semanticNodeCounts.get(NodeType.HEADLINE) == null ? 0 : semanticNodeCounts.get(NodeType.HEADLINE),
|
||||
semanticNodeCounts.get(NodeType.PARAGRAPH) == null ? 0 : semanticNodeCounts.get(NodeType.PARAGRAPH),
|
||||
semanticNodeCounts.get(NodeType.TABLE) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE),
|
||||
semanticNodeCounts.get(NodeType.TABLE_CELL) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE_CELL),
|
||||
semanticNodeCounts.get(NodeType.HEADER) == null ? 0 : semanticNodeCounts.get(NodeType.HEADER),
|
||||
semanticNodeCounts.get(NodeType.FOOTER) == null ? 0 : semanticNodeCounts.get(NodeType.FOOTER));
|
||||
}
|
||||
|
||||
|
||||
@ -220,6 +224,9 @@ public class LayoutParsingPipeline {
|
||||
Map<Integer, List<ClassifiedImage>> pdfImages = imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse);
|
||||
Map<Integer, List<ClassifiedImage>> signatures = visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse);
|
||||
ClassificationDocument classificationDocument = new ClassificationDocument();
|
||||
|
||||
classificationDocument.getVisualizations().setActive(identifier.containsKey("debug"));
|
||||
|
||||
List<ClassificationPage> classificationPages = new ArrayList<>();
|
||||
|
||||
long pageCount = originDocument.getNumberOfPages();
|
||||
@ -249,6 +256,8 @@ public class LayoutParsingPipeline {
|
||||
}
|
||||
stripper.getText(originDocument);
|
||||
|
||||
classificationDocument.getVisualizations().addTextVisualizations(stripper.getTextPositionSequences(), pageNumber);
|
||||
|
||||
PDRectangle pdr = pdPage.getMediaBox();
|
||||
|
||||
int rotation = pdPage.getRotation();
|
||||
@ -257,6 +266,8 @@ public class LayoutParsingPipeline {
|
||||
PDRectangle cropbox = pdPage.getCropBox();
|
||||
CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(pdfTableCells.get(pageNumber), stripper.getRulings());
|
||||
|
||||
classificationDocument.getVisualizations().addCleanRulingVisualization(cleanRulings, pageNumber);
|
||||
|
||||
List<Cell> emptyTableCells = TableExtractionService.findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical());
|
||||
|
||||
var graphics = graphicExtractorService.extractPathElementGraphics(originDocument,
|
||||
@ -272,11 +283,16 @@ public class LayoutParsingPipeline {
|
||||
.map(g -> new ClassifiedImage(new Rectangle2D.Double(g.x1, g.y1, g.width(), g.height()), ImageType.GRAPHIC, false, stripper.getPageNumber()))
|
||||
.toList());
|
||||
|
||||
classificationDocument.getVisualizations().addCellVisualizations(emptyTableCells, pageNumber);
|
||||
|
||||
ClassificationPage classificationPage = switch (layoutParsingType) {
|
||||
case REDACT_MANAGER_OLD -> redactManagerBlockificationService.blockify(stripper.getTextPositionSequences(), emptyTableCells);
|
||||
case REDACT_MANAGER_OLD ->
|
||||
redactManagerBlockificationService.blockify(stripper.getTextPositionSequences(), emptyTableCells);
|
||||
case DOCUMINE -> docuMineBlockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
|
||||
case REDACT_MANAGER, REDACT_MANAGER_PARAGRAPH_DEBUG -> docstrumBlockificationService.blockify(stripper.getTextPositionSequences(), emptyTableCells, true);
|
||||
case CLARIFYND, CLARIFYND_PARAGRAPH_DEBUG -> docstrumBlockificationService.blockify(stripper.getTextPositionSequences(), emptyTableCells, false);
|
||||
case REDACT_MANAGER, REDACT_MANAGER_PARAGRAPH_DEBUG ->
|
||||
docstrumBlockificationService.blockify(stripper.getTextPositionSequences(), emptyTableCells, true, classificationDocument.getVisualizations());
|
||||
case CLARIFYND, CLARIFYND_PARAGRAPH_DEBUG ->
|
||||
docstrumBlockificationService.blockify(stripper.getTextPositionSequences(), emptyTableCells, false, classificationDocument.getVisualizations());
|
||||
};
|
||||
|
||||
classificationPage.setCleanRulings(cleanRulings);
|
||||
@ -286,8 +302,9 @@ public class LayoutParsingPipeline {
|
||||
classificationPage.setPageWidth(cropbox.getWidth());
|
||||
classificationPage.setPageHeight(cropbox.getHeight());
|
||||
|
||||
classificationDocument.getVisualizations().addMarkedContentVisualizations(stripper.getMarkedContents(), pageNumber, pdPage);
|
||||
// MarkedContent needs to be converted at this point, otherwise it leads to GC Problems in Pdfbox.
|
||||
classificationPage.setMarkedContentBboxPerType(convertMarkedContents(stripper.getMarkedContents()));
|
||||
classificationPage.setMarkedContentBboxPerType(convertMarkedContents(stripper.getMarkedContents(), pdPage));
|
||||
|
||||
// If images is ocr needs to be calculated before textBlocks are moved into tables, otherwise findOcr algorithm needs to be adopted.
|
||||
if (pdfImages != null && pdfImages.containsKey(pageNumber)) {
|
||||
@ -361,11 +378,11 @@ public class LayoutParsingPipeline {
|
||||
}
|
||||
|
||||
|
||||
private Map<String, List<Rectangle2D>> convertMarkedContents(List<PDMarkedContent> pdMarkedContents) {
|
||||
private Map<String, List<Rectangle2D>> convertMarkedContents(List<PDMarkedContent> pdMarkedContents, PDPage pdPage) {
|
||||
|
||||
Map<String, List<Rectangle2D>> markedContentBboxes = new HashMap<>();
|
||||
markedContentBboxes.put(MarkedContentUtils.HEADER, MarkedContentUtils.getMarkedContentBboxPerLine(pdMarkedContents, MarkedContentUtils.HEADER));
|
||||
markedContentBboxes.put(MarkedContentUtils.FOOTER, MarkedContentUtils.getMarkedContentBboxPerLine(pdMarkedContents, MarkedContentUtils.FOOTER));
|
||||
markedContentBboxes.put(MarkedContentUtils.HEADER, MarkedContentUtils.getMarkedContentBboxPerLine(pdMarkedContents, MarkedContentUtils.HEADER, pdPage));
|
||||
markedContentBboxes.put(MarkedContentUtils.FOOTER, MarkedContentUtils.getMarkedContentBboxPerLine(pdMarkedContents, MarkedContentUtils.FOOTER, pdPage));
|
||||
return markedContentBboxes;
|
||||
}
|
||||
|
||||
|
||||
@ -7,12 +7,14 @@ import java.util.stream.Collectors;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.Character;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.Line;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.Zone;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.docstrum.service.LineBuilderService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.docstrum.service.NearestNeighbourService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.docstrum.service.ReadingOrderService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.docstrum.service.SpacingService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.docstrum.service.ZoneBuilderService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextDirection;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
|
||||
|
||||
@ -43,16 +45,16 @@ public class DocstrumSegmentationService {
|
||||
|
||||
private List<Zone> computeZones(List<TextPositionSequence> textPositions, TextDirection direction) {
|
||||
|
||||
var positions = textPositions.stream().filter(t -> t.getDir() == direction).map(TextPositionSequence::getTextPositions).flatMap(List::stream).toList();
|
||||
List<RedTextPosition> positions = textPositions.stream().filter(t -> t.getDir() == direction).map(TextPositionSequence::getTextPositions).flatMap(List::stream).toList();
|
||||
|
||||
var characters = positions.stream().map(Character::new).collect(Collectors.toList());
|
||||
List<Character> characters = positions.stream().map(Character::new).collect(Collectors.toList());
|
||||
|
||||
nearestNeighbourService.findNearestNeighbors(characters);
|
||||
|
||||
var characterSpacing = spacingService.computeCharacterSpacing(characters);
|
||||
var lineSpacing = Math.min(spacingService.computeLineSpacing(characters), 20);
|
||||
double characterSpacing = spacingService.computeCharacterSpacing(characters);
|
||||
double lineSpacing = Math.min(spacingService.computeLineSpacing(characters), 20);
|
||||
|
||||
var lines = lineBuilderService.buildLines(characters, characterSpacing, lineSpacing);
|
||||
List<Line> lines = lineBuilderService.buildLines(characters, characterSpacing, lineSpacing);
|
||||
return zoneBuilderService.buildZones(lines, characterSpacing, lineSpacing);
|
||||
}
|
||||
|
||||
|
||||
@ -27,8 +27,8 @@ public class Character {
|
||||
|
||||
public Character(RedTextPosition chunk) {
|
||||
|
||||
this.x = chunk.getXDirAdj() + chunk.getWidthDirAdj() / 2;
|
||||
this.y = chunk.getYDirAdj() + chunk.getHeightDir() / 2;
|
||||
this.x = chunk.getDirectionAdjustedPosition().getCenterX();
|
||||
this.y = chunk.getDirectionAdjustedPosition().getCenterY();
|
||||
this.textPosition = chunk;
|
||||
}
|
||||
|
||||
@ -82,5 +82,4 @@ public class Character {
|
||||
return FastAtan2.fastAtan2(character.getY() - getY(), character.getX() - getX());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,11 +1,12 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.docstrum.model;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
@ -84,7 +85,9 @@ public class Line extends BoundingBox {
|
||||
|
||||
private double computeHeight() {
|
||||
|
||||
return characters.stream().map(Character::getHeight).reduce(0d, Double::sum) / characters.size();
|
||||
return characters.stream()
|
||||
.map(Character::getHeight)
|
||||
.reduce(0d, Double::sum) / characters.size();
|
||||
}
|
||||
|
||||
|
||||
@ -116,7 +119,7 @@ public class Line extends BoundingBox {
|
||||
|
||||
double ym = (y0 + y1) / 2;
|
||||
double yn = (other.y0 + other.y1) / 2;
|
||||
return Math.abs(ym - yn) / Math.sqrt(1);
|
||||
return Math.abs(ym - yn);
|
||||
}
|
||||
|
||||
|
||||
@ -141,21 +144,10 @@ public class Line extends BoundingBox {
|
||||
|
||||
private void buildBBox() {
|
||||
|
||||
double minX = Double.POSITIVE_INFINITY;
|
||||
double minY = Double.POSITIVE_INFINITY;
|
||||
double maxX = Double.NEGATIVE_INFINITY;
|
||||
double maxY = Double.NEGATIVE_INFINITY;
|
||||
|
||||
for (Character character : characters) {
|
||||
|
||||
minX = Math.min(minX, character.getTextPosition().getXDirAdj());
|
||||
minY = Math.min(minY, character.getTextPosition().getYDirAdj());
|
||||
maxX = Math.max(maxX, character.getTextPosition().getXDirAdj() + character.getTextPosition().getWidthDirAdj());
|
||||
maxY = Math.max(maxY, character.getTextPosition().getYDirAdj() + character.getTextPosition().getHeightDir());
|
||||
|
||||
}
|
||||
|
||||
this.setBBox(new Rectangle2D.Double(minX, minY, maxX - minX, maxY - minY));
|
||||
this.setBBox(characters.stream()
|
||||
.map(Character::getTextPosition)
|
||||
.map(RedTextPosition::getInitialUserSpacePosition)
|
||||
.collect(RectangleTransformations.collectBBox()));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.docstrum.model;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@ -23,21 +24,9 @@ public class Zone extends BoundingBox {
|
||||
|
||||
public void buildBBox() {
|
||||
|
||||
double minX = Double.POSITIVE_INFINITY;
|
||||
double minY = Double.POSITIVE_INFINITY;
|
||||
double maxX = Double.NEGATIVE_INFINITY;
|
||||
double maxY = Double.NEGATIVE_INFINITY;
|
||||
|
||||
for (Line line : lines) {
|
||||
|
||||
minX = Math.min(minX, line.getX());
|
||||
minY = Math.min(minY, line.getY());
|
||||
maxX = Math.max(maxX, line.getX() + line.getWidth());
|
||||
maxY = Math.max(maxY, line.getY() + line.getHeight());
|
||||
|
||||
}
|
||||
|
||||
this.setBBox(new Rectangle2D.Double(minX, minY, maxX - minX, maxY - minY));
|
||||
this.setBBox(getLines().stream()
|
||||
.map(BoundingBox::getBBox)
|
||||
.collect(RectangleTransformations.collectBBox()));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.docstrum.service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
@ -30,24 +29,25 @@ public class LineBuilderService {
|
||||
AngleFilter filter = new AngleFilter(-ANGLE_TOLERANCE, ANGLE_TOLERANCE);
|
||||
|
||||
characters.forEach(character -> {
|
||||
character.getNeighbors().forEach(neighbor -> {
|
||||
double x = neighbor.getHorizontalDistance() / maxHorizontalDistance;
|
||||
double y = neighbor.getVerticalDistance() / maxVerticalDistance;
|
||||
if (character.getTextPosition().getDir() == neighbor.getCharacter().getTextPosition().getDir() && filter.matches(neighbor) && Math.pow(x, 2) + Math.pow(y,
|
||||
2) <= 1) {
|
||||
unionFind.union(character, neighbor.getCharacter());
|
||||
}
|
||||
});
|
||||
character.getNeighbors()
|
||||
.forEach(neighbor -> {
|
||||
double normalizedHorizontalDistance = neighbor.getHorizontalDistance() / maxHorizontalDistance;
|
||||
double normalizedVerticalDistance = neighbor.getVerticalDistance() / maxVerticalDistance;
|
||||
if (character.getTextPosition().getDir() == neighbor.getCharacter().getTextPosition().getDir() //
|
||||
&& filter.matches(neighbor) //
|
||||
&& Math.pow(normalizedHorizontalDistance, 2) + Math.pow(normalizedVerticalDistance, 2) <= 1) {
|
||||
unionFind.union(character, neighbor.getCharacter());
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
List<Line> lines = new ArrayList<>();
|
||||
unionFind.getGroups().forEach(group -> {
|
||||
List<Character> lineCharacters = new ArrayList<>(group);
|
||||
lineCharacters.sort(Comparator.comparingDouble(Character::getX));
|
||||
lines.add(new Line(lineCharacters, characterSpacing));
|
||||
});
|
||||
|
||||
return lines;
|
||||
return unionFind.getGroups()
|
||||
.stream()
|
||||
.map(lineCharacters -> lineCharacters.stream()
|
||||
.sorted(Comparator.comparingDouble(Character::getX))
|
||||
.toList())
|
||||
.map(lineCharacters -> new Line(lineCharacters, characterSpacing))
|
||||
.toList();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -45,29 +45,35 @@ public class ZoneBuilderService {
|
||||
|
||||
double meanHeight = calculateMeanHeight(lines);
|
||||
|
||||
lines.forEach(outerLine -> //
|
||||
lines.forEach(innerLine -> {
|
||||
lines.forEach(outerLine -> {
|
||||
lines.forEach(innerLine -> {
|
||||
|
||||
double scale = Math.min(outerLine.getHeight(), innerLine.getHeight()) / meanHeight;
|
||||
scale = Math.max(MIN_LINE_SIZE_SCALE, Math.min(scale, MAX_LINE_SIZE_SCALE));
|
||||
if (innerLine == outerLine //
|
||||
|| unionFind.inSameSet(outerLine, innerLine)//
|
||||
|| outerLine.angularDifference(innerLine) > ANGLE_TOLERANCE) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!unionFind.inSameSet(outerLine, innerLine) && outerLine.angularDifference(innerLine) <= ANGLE_TOLERANCE) {
|
||||
double scale = Math.min(outerLine.getHeight(), innerLine.getHeight()) / meanHeight;
|
||||
scale = Math.max(MIN_LINE_SIZE_SCALE, Math.min(scale, MAX_LINE_SIZE_SCALE));
|
||||
|
||||
double horizontalDistance = outerLine.horizontalDistance(innerLine) / scale;
|
||||
double verticalDistance = outerLine.verticalDistance(innerLine) / scale;
|
||||
double horizontalDistance = outerLine.horizontalDistance(innerLine) / scale;
|
||||
double verticalDistance = outerLine.verticalDistance(innerLine) / scale;
|
||||
|
||||
if (minHorizontalDistance <= horizontalDistance && verticalDistance <= maxVerticalDistance //
|
||||
|| minHorizontalMergeDistance <= horizontalDistance && verticalDistance <= maxVerticalMergeDistance) {
|
||||
unionFind.union(outerLine, innerLine);
|
||||
}
|
||||
}
|
||||
}));
|
||||
if (minHorizontalDistance <= horizontalDistance && verticalDistance <= maxVerticalDistance //
|
||||
|| minHorizontalMergeDistance <= horizontalDistance && verticalDistance <= maxVerticalMergeDistance) {
|
||||
|
||||
List<Zone> zones = new ArrayList<>();
|
||||
unionFind.getGroups().forEach(group -> {
|
||||
zones.add(mergeLinesInZone(new ArrayList<>(group), characterSpacing, lineSpacing));
|
||||
unionFind.union(outerLine, innerLine);
|
||||
}
|
||||
|
||||
});
|
||||
});
|
||||
|
||||
List<Zone> zones = unionFind.getGroups()
|
||||
.stream()
|
||||
.map(group -> mergeLinesInZone(new ArrayList<>(group), characterSpacing, lineSpacing))
|
||||
.toList();
|
||||
|
||||
if (zones.size() > MAX_ZONES) {
|
||||
List<Line> oneZoneLines = new ArrayList<>();
|
||||
for (Zone zone : zones) {
|
||||
@ -103,35 +109,40 @@ public class ZoneBuilderService {
|
||||
UnionFind<Line> unionFind = new UnionFind<>(new HashSet<>(lines));
|
||||
|
||||
lines.forEach(outer -> {
|
||||
|
||||
lines.forEach(inner -> {
|
||||
if (inner != outer) {
|
||||
if (inner == outer) {
|
||||
return;
|
||||
}
|
||||
|
||||
double horizontalDistance = outer.horizontalDistance(inner);
|
||||
double verticalDistance = outer.verticalDistance(inner);
|
||||
double horizontalDistance = outer.horizontalDistance(inner);
|
||||
double verticalDistance = outer.verticalDistance(inner);
|
||||
|
||||
if (horizontalDistance <= maxHorizontalDistance && minVerticalDistance <= verticalDistance && verticalDistance <= maxVerticalDistance) {
|
||||
unionFind.union(outer, inner);
|
||||
} else if (minVerticalDistance <= verticalDistance && verticalDistance <= maxVerticalDistance && Math.abs(horizontalDistance - Math.min(outer.getLength(),
|
||||
inner.getLength())) < 0.1) {
|
||||
boolean characterOverlap = false;
|
||||
int overlappingCount = 0;
|
||||
for (Character outerCharacter : outer.getCharacters()) {
|
||||
for (Character innerCharacter : inner.getCharacters()) {
|
||||
double characterOverlapDistance = outerCharacter.overlappingDistance(innerCharacter);
|
||||
if (characterOverlapDistance > 2) {
|
||||
characterOverlap = true;
|
||||
}
|
||||
if (characterOverlapDistance > 0) {
|
||||
overlappingCount++;
|
||||
}
|
||||
if (horizontalDistance <= maxHorizontalDistance && minVerticalDistance <= verticalDistance && verticalDistance <= maxVerticalDistance) {
|
||||
|
||||
unionFind.union(outer, inner);
|
||||
|
||||
} else if (minVerticalDistance <= verticalDistance
|
||||
&& verticalDistance <= maxVerticalDistance
|
||||
&& Math.abs(horizontalDistance - Math.min(outer.getLength(), inner.getLength())) < 0.1) {
|
||||
|
||||
boolean characterOverlap = false;
|
||||
int overlappingCount = 0;
|
||||
for (Character outerCharacter : outer.getCharacters()) {
|
||||
for (Character innerCharacter : inner.getCharacters()) {
|
||||
double characterOverlapDistance = outerCharacter.overlappingDistance(innerCharacter);
|
||||
if (characterOverlapDistance > 2) {
|
||||
characterOverlap = true;
|
||||
}
|
||||
if (characterOverlapDistance > 0) {
|
||||
overlappingCount++;
|
||||
}
|
||||
}
|
||||
if (!characterOverlap && overlappingCount <= 2) {
|
||||
unionFind.union(outer, inner);
|
||||
}
|
||||
}
|
||||
if (!characterOverlap && overlappingCount <= 2) {
|
||||
unionFind.union(outer, inner);
|
||||
}
|
||||
}
|
||||
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@ -5,6 +5,7 @@ import java.util.List;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.StringFrequencyCounter;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.UnclassifiedText;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.visualization.LayoutparsingVisualizations;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
@ -22,6 +23,7 @@ public class ClassificationDocument {
|
||||
private FloatFrequencyCounter fontSizeCounter = new FloatFrequencyCounter();
|
||||
private StringFrequencyCounter fontCounter = new StringFrequencyCounter();
|
||||
private StringFrequencyCounter fontStyleCounter = new StringFrequencyCounter();
|
||||
private LayoutparsingVisualizations visualizations = new LayoutparsingVisualizations();
|
||||
private boolean headlines;
|
||||
|
||||
private long rulesVersion;
|
||||
|
||||
@ -12,6 +12,7 @@ import java.util.stream.Stream;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.visualization.LayoutparsingVisualizations;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
|
||||
@ -40,6 +41,8 @@ public class Document implements GenericSemanticNode {
|
||||
@Builder.Default
|
||||
Set<RedactionEntity> entities = new HashSet<>();
|
||||
|
||||
LayoutparsingVisualizations visualizations;
|
||||
|
||||
|
||||
@Override
|
||||
public NodeType getType() {
|
||||
|
||||
@ -4,12 +4,8 @@ import java.awt.geom.Line2D;
|
||||
import java.awt.geom.Point2D;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.Formatter;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.CohenSutherlandClipping;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.DoubleComparisons;
|
||||
@ -60,126 +56,13 @@ public class Ruling extends Line2D.Float {
|
||||
}
|
||||
|
||||
|
||||
// log(n) implementation of find_intersections
|
||||
// based on http://people.csail.mit.edu/indyk/6.838-old/handouts/lec2.pdf
|
||||
public static Map<Point2D, Ruling[]> findIntersections(List<Ruling> horizontals, List<Ruling> verticals) {
|
||||
|
||||
class SortObject {
|
||||
|
||||
protected SOType type;
|
||||
protected float position;
|
||||
protected Ruling ruling;
|
||||
|
||||
|
||||
public SortObject(SOType type, float position, Ruling ruling) {
|
||||
|
||||
this.type = type;
|
||||
this.position = position;
|
||||
this.ruling = ruling;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
List<SortObject> sos = new ArrayList<>();
|
||||
|
||||
TreeMap<Ruling, Boolean> tree = new TreeMap<>(new Comparator<Ruling>() {
|
||||
@Override
|
||||
public int compare(Ruling o1, Ruling o2) {
|
||||
|
||||
return java.lang.Double.compare(o1.getTop(), o2.getTop());
|
||||
}
|
||||
});
|
||||
|
||||
TreeMap<Point2D, Ruling[]> rv = new TreeMap<>(new Comparator<Point2D>() {
|
||||
@Override
|
||||
public int compare(Point2D o1, Point2D o2) {
|
||||
|
||||
if (o1.getY() > o2.getY()) {
|
||||
return 1;
|
||||
}
|
||||
if (o1.getY() < o2.getY()) {
|
||||
return -1;
|
||||
}
|
||||
if (o1.getX() > o2.getX()) {
|
||||
return 1;
|
||||
}
|
||||
if (o1.getX() < o2.getX()) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
});
|
||||
|
||||
for (Ruling h : horizontals) {
|
||||
sos.add(new SortObject(SOType.HLEFT, h.getLeft() - PERPENDICULAR_UNIT_EXPAND_AMOUNT, h));
|
||||
sos.add(new SortObject(SOType.HRIGHT, h.getRight() + PERPENDICULAR_UNIT_EXPAND_AMOUNT, h));
|
||||
}
|
||||
|
||||
for (Ruling v : verticals) {
|
||||
sos.add(new SortObject(SOType.VERTICAL, v.getLeft(), v));
|
||||
}
|
||||
|
||||
Collections.sort(sos, new Comparator<SortObject>() {
|
||||
@Override
|
||||
public int compare(SortObject a, SortObject b) {
|
||||
|
||||
int rv;
|
||||
if (DoubleComparisons.feq(a.position, b.position)) {
|
||||
if (a.type == SOType.VERTICAL && b.type == SOType.HLEFT) {
|
||||
rv = 1;
|
||||
} else if (a.type == SOType.VERTICAL && b.type == SOType.HRIGHT) {
|
||||
rv = -1;
|
||||
} else if (a.type == SOType.HLEFT && b.type == SOType.VERTICAL) {
|
||||
rv = -1;
|
||||
} else if (a.type == SOType.HRIGHT && b.type == SOType.VERTICAL) {
|
||||
rv = 1;
|
||||
} else {
|
||||
rv = java.lang.Double.compare(a.position, b.position);
|
||||
}
|
||||
} else {
|
||||
return java.lang.Double.compare(a.position, b.position);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
});
|
||||
|
||||
for (SortObject so : sos) {
|
||||
switch (so.type) {
|
||||
case VERTICAL:
|
||||
for (Map.Entry<Ruling, Boolean> h : tree.entrySet()) {
|
||||
try {
|
||||
Point2D i = h.getKey().intersectionPoint(so.ruling);
|
||||
if (i == null) {
|
||||
continue;
|
||||
}
|
||||
rv.put(i, new Ruling[]{h.getKey().expand(PERPENDICULAR_UNIT_EXPAND_AMOUNT), so.ruling.expand(PERPENDICULAR_UNIT_EXPAND_AMOUNT)});
|
||||
} catch (UnsupportedOperationException e) {
|
||||
log.info("Some line are oblique, ignoring...");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case HRIGHT:
|
||||
tree.remove(so.ruling);
|
||||
break;
|
||||
case HLEFT:
|
||||
tree.put(so.ruling, true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return rv;
|
||||
|
||||
}
|
||||
|
||||
|
||||
public boolean vertical() {
|
||||
public boolean isVertical() {
|
||||
|
||||
return this.length() > 0 && DoubleComparisons.feq(this.x1, this.x2); //diff < ORIENTATION_CHECK_THRESHOLD;
|
||||
}
|
||||
|
||||
|
||||
public boolean horizontal() {
|
||||
public boolean isHorizontal() {
|
||||
|
||||
return this.length() > 0 && DoubleComparisons.feq(this.y1, this.y2); //diff < ORIENTATION_CHECK_THRESHOLD;
|
||||
}
|
||||
@ -188,36 +71,36 @@ public class Ruling extends Line2D.Float {
|
||||
// these are used to have a single collapse method (in page, currently)
|
||||
|
||||
|
||||
public boolean oblique() {
|
||||
public boolean isOblique() {
|
||||
|
||||
return !(this.vertical() || this.horizontal());
|
||||
return !(this.isVertical() || this.isHorizontal());
|
||||
}
|
||||
|
||||
|
||||
public float getPosition() {
|
||||
|
||||
if (this.oblique()) {
|
||||
if (this.isOblique()) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
return this.vertical() ? this.getLeft() : this.getTop();
|
||||
return this.isVertical() ? this.getLeft() : this.getTop();
|
||||
}
|
||||
|
||||
|
||||
public float getStart() {
|
||||
|
||||
if (this.oblique()) {
|
||||
if (this.isOblique()) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
return this.vertical() ? this.getTop() : this.getLeft();
|
||||
return this.isVertical() ? this.getTop() : this.getLeft();
|
||||
}
|
||||
|
||||
|
||||
public void setStart(float v) {
|
||||
|
||||
if (this.oblique()) {
|
||||
if (this.isOblique()) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
if (this.vertical()) {
|
||||
if (this.isVertical()) {
|
||||
this.setTop(v);
|
||||
} else {
|
||||
this.setLeft(v);
|
||||
@ -227,19 +110,19 @@ public class Ruling extends Line2D.Float {
|
||||
|
||||
public float getEnd() {
|
||||
|
||||
if (this.oblique()) {
|
||||
if (this.isOblique()) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
return this.vertical() ? this.getBottom() : this.getRight();
|
||||
return this.isVertical() ? this.getBottom() : this.getRight();
|
||||
}
|
||||
|
||||
|
||||
public void setEnd(float v) {
|
||||
|
||||
if (this.oblique()) {
|
||||
if (this.isOblique()) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
if (this.vertical()) {
|
||||
if (this.isVertical()) {
|
||||
this.setBottom(v);
|
||||
} else {
|
||||
this.setRight(v);
|
||||
@ -249,10 +132,10 @@ public class Ruling extends Line2D.Float {
|
||||
|
||||
public void setStartEnd(float start, float end) {
|
||||
|
||||
if (this.oblique()) {
|
||||
if (this.isOblique()) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
if (this.vertical()) {
|
||||
if (this.isVertical()) {
|
||||
this.setTop(start);
|
||||
this.setBottom(end);
|
||||
} else {
|
||||
@ -264,7 +147,7 @@ public class Ruling extends Line2D.Float {
|
||||
|
||||
public boolean perpendicularTo(Ruling other) {
|
||||
|
||||
return this.vertical() == other.horizontal();
|
||||
return this.isVertical() == other.isHorizontal();
|
||||
}
|
||||
|
||||
|
||||
@ -318,30 +201,6 @@ public class Ruling extends Line2D.Float {
|
||||
}
|
||||
|
||||
|
||||
public Point2D intersectionPoint(Ruling other) {
|
||||
|
||||
Ruling this_l = this.expand(PERPENDICULAR_UNIT_EXPAND_AMOUNT);
|
||||
Ruling other_l = other.expand(PERPENDICULAR_UNIT_EXPAND_AMOUNT);
|
||||
Ruling horizontal, vertical;
|
||||
|
||||
if (!this_l.intersectsLine(other_l)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (this_l.horizontal() && other_l.vertical()) {
|
||||
horizontal = this_l;
|
||||
vertical = other_l;
|
||||
} else if (this_l.vertical() && other_l.horizontal()) {
|
||||
vertical = this_l;
|
||||
horizontal = other_l;
|
||||
} else {
|
||||
log.warn("lines must be orthogonal, vertical and horizontal");
|
||||
return null;
|
||||
}
|
||||
return new Point2D.Float(vertical.getLeft(), horizontal.getTop());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
|
||||
@ -451,16 +310,9 @@ public class Ruling extends Line2D.Float {
|
||||
|
||||
final float TOLERANCE = 1;
|
||||
return Math.abs(ruling.getX1() - x1) < TOLERANCE &&//
|
||||
Math.abs(ruling.getY1() - y1) < TOLERANCE &&//
|
||||
Math.abs(ruling.getX2() - x2) < TOLERANCE &&//
|
||||
Math.abs(ruling.getY2() - y2) < TOLERANCE;
|
||||
}
|
||||
|
||||
|
||||
private enum SOType {
|
||||
VERTICAL,
|
||||
HRIGHT,
|
||||
HLEFT
|
||||
Math.abs(ruling.getY1() - y1) < TOLERANCE &&//
|
||||
Math.abs(ruling.getX2() - x2) < TOLERANCE &&//
|
||||
Math.abs(ruling.getY2() - y2) < TOLERANCE;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,5 +1,8 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.model.text;
|
||||
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
@ -16,7 +19,9 @@ import lombok.SneakyThrows;
|
||||
@AllArgsConstructor
|
||||
public class RedTextPosition {
|
||||
|
||||
private float[] position;
|
||||
private final static int HEIGHT_PADDING = 2;
|
||||
private Rectangle2D.Float directionAdjustedPosition;
|
||||
private Rectangle2D initialUserSpacePosition;
|
||||
|
||||
@JsonIgnore
|
||||
private int rotation;
|
||||
@ -58,43 +63,65 @@ public class RedTextPosition {
|
||||
pos.setFontSizeInPt(textPosition.getFontSizeInPt());
|
||||
pos.setFontName(textPosition.getFont().getName());
|
||||
|
||||
var position = new float[4];
|
||||
float textHeight = textPosition.getHeight() + HEIGHT_PADDING;
|
||||
Rectangle2D.Float dirAdjPosition = new Rectangle2D.Float(textPosition.getXDirAdj(),
|
||||
textPosition.getYDirAdj() - textHeight,
|
||||
textPosition.getWidthDirAdj(),
|
||||
textHeight + HEIGHT_PADDING);
|
||||
pos.setDirectionAdjustedPosition(dirAdjPosition);
|
||||
|
||||
position[0] = textPosition.getXDirAdj();
|
||||
position[1] = textPosition.getYDirAdj();
|
||||
position[2] = textPosition.getWidthDirAdj();
|
||||
position[3] = textPosition.getHeightDir();
|
||||
AffineTransform affineTransform = getRotationMatrix(TextDirection.fromDegrees(textPosition.getDir()), textPosition.getPageWidth(), textPosition.getPageHeight());
|
||||
Rectangle2D initialUserSpacePositionRect = affineTransform.createTransformedShape(dirAdjPosition).getBounds2D();
|
||||
|
||||
pos.setInitialUserSpacePosition(initialUserSpacePositionRect);
|
||||
|
||||
pos.setPosition(position);
|
||||
return pos;
|
||||
}
|
||||
|
||||
|
||||
private static AffineTransform getRotationMatrix(TextDirection textDirection, float pageWidth, float pageHeight) {
|
||||
|
||||
AffineTransform transform = new AffineTransform();
|
||||
|
||||
if (textDirection == TextDirection.ZERO || textDirection == TextDirection.HALF_CIRCLE) {
|
||||
transform.rotate(textDirection.getRadians(), pageWidth / 2f, pageHeight / 2f);
|
||||
transform.translate(0f, pageHeight);
|
||||
} else if (textDirection == TextDirection.QUARTER_CIRCLE) {
|
||||
transform.rotate(textDirection.getRadians(), pageWidth / 2f, pageWidth / 2f);
|
||||
transform.translate(0f, pageWidth);
|
||||
} else {
|
||||
transform.rotate(textDirection.getRadians(), pageHeight / 2f, pageHeight / 2f);
|
||||
transform.translate(0f, pageWidth);
|
||||
}
|
||||
transform.scale(1., -1.);
|
||||
return transform;
|
||||
}
|
||||
|
||||
@JsonIgnore
|
||||
public float getXDirAdj() {
|
||||
|
||||
return position[0];
|
||||
return this.directionAdjustedPosition.x;
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getYDirAdj() {
|
||||
|
||||
return position[1];
|
||||
return this.directionAdjustedPosition.y;
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getWidthDirAdj() {
|
||||
|
||||
return position[2];
|
||||
return this.directionAdjustedPosition.width;
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getHeightDir() {
|
||||
|
||||
return position[3];
|
||||
return this.directionAdjustedPosition.height;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -2,6 +2,7 @@ package com.knecon.fforesight.service.layoutparser.processor.model.text;
|
||||
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.awt.geom.Point2D;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
@ -11,6 +12,7 @@ import org.apache.pdfbox.text.TextPosition;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
@ -44,21 +46,19 @@ public class TextPositionSequence implements CharSequence {
|
||||
private boolean isParagraphStart;
|
||||
|
||||
|
||||
public TextPositionSequence(int page) {
|
||||
public TextPositionSequence(List<TextPosition> textPositions, int pageNumber, boolean isParagraphStart) {
|
||||
|
||||
this.page = page;
|
||||
}
|
||||
|
||||
|
||||
public TextPositionSequence(List<TextPosition> textPositions, int page, boolean isParagraphStart) {
|
||||
|
||||
this.textPositions = textPositions.stream().map(RedTextPosition::fromTextPosition).collect(Collectors.toList());
|
||||
this.page = page;
|
||||
this.textPositions = textPositions.stream()
|
||||
.map(RedTextPosition::fromTextPosition)
|
||||
.collect(Collectors.toList());
|
||||
this.page = pageNumber;
|
||||
this.dir = TextDirection.fromDegrees(textPositions.get(0).getDir());
|
||||
this.rotation = textPositions.get(0).getRotation();
|
||||
this.pageHeight = textPositions.get(0).getPageHeight();
|
||||
this.pageWidth = textPositions.get(0).getPageWidth();
|
||||
this.isParagraphStart = isParagraphStart;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -314,10 +314,18 @@ public class TextPositionSequence implements CharSequence {
|
||||
topRight = transform.transform(topRight, null);
|
||||
|
||||
return new Rectangle( //
|
||||
new Point((float) bottomLeft.getX(), (float) bottomLeft.getY()),
|
||||
(float) (topRight.getX() - bottomLeft.getX()),
|
||||
(float) (topRight.getY() - bottomLeft.getY()),
|
||||
page);
|
||||
new Point((float) bottomLeft.getX(), (float) bottomLeft.getY()),
|
||||
(float) (topRight.getX() - bottomLeft.getX()),
|
||||
(float) (topRight.getY() - bottomLeft.getY()),
|
||||
page);
|
||||
}
|
||||
|
||||
|
||||
public Rectangle2D getBoundingBox() {
|
||||
|
||||
return getTextPositions().stream()
|
||||
.map(RedTextPosition::getInitialUserSpacePosition)
|
||||
.collect(RectangleTransformations.collectBBox());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -33,6 +33,7 @@ public class BodyTextFrameService {
|
||||
for (ClassificationPage page : classificationDocument.getPages()) {
|
||||
// var updatedBodyTextFrame = getBodyTextFrameFromRulings(page, bodyTextFrame, landscapeBodyTextFrame);
|
||||
setBodyTextFrameAdjustedToPage(page, bodyTextFrame, landscapeBodyTextFrame);
|
||||
classificationDocument.getVisualizations().addMainBodyVisualization(page.isLandscape() ? landscapeBodyTextFrame : bodyTextFrame, page.getPageNumber());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -122,7 +122,7 @@ public class RulingCleaningService {
|
||||
h = ruling.y1 - ruling.y2;
|
||||
}
|
||||
|
||||
if (ruling.horizontal()) {
|
||||
if (ruling.isHorizontal()) {
|
||||
return new Rectangle(top - THRESHOLD_Y_HORIZONTAL, left - THRESHOLD_X_HORIZONTAL, w + 2 * THRESHOLD_X_HORIZONTAL, h + 2 * THRESHOLD_Y_HORIZONTAL);
|
||||
} else {
|
||||
return new Rectangle(top - THRESHOLD_Y_VERTICAL, left - THRESHOLD_X_VERTICAL, w + 2 * THRESHOLD_X_VERTICAL, h + 2 * THRESHOLD_Y_VERTICAL);
|
||||
@ -160,14 +160,14 @@ public class RulingCleaningService {
|
||||
|
||||
List<Ruling> vrs = new ArrayList<>();
|
||||
for (Ruling vr : rulings) {
|
||||
if (vr.vertical()) {
|
||||
if (vr.isVertical()) {
|
||||
vrs.add(vr);
|
||||
}
|
||||
}
|
||||
|
||||
List<Ruling> hrs = new ArrayList<>();
|
||||
for (Ruling hr : rulings) {
|
||||
if (hr.horizontal()) {
|
||||
if (hr.isHorizontal()) {
|
||||
hrs.add(hr);
|
||||
}
|
||||
}
|
||||
|
||||
@ -15,6 +15,7 @@ import com.knecon.fforesight.service.layoutparser.processor.docstrum.utils.Doubl
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.FloatFrequencyCounter;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.visualization.LayoutparsingVisualizations;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
|
||||
@ -37,11 +38,18 @@ public class DocstrumBlockificationService {
|
||||
static final float THRESHOLD = 1f;
|
||||
|
||||
|
||||
public ClassificationPage blockify(List<TextPositionSequence> textPositions, List<Cell> cells, boolean xyOrder) {
|
||||
public ClassificationPage blockify(List<TextPositionSequence> textPositions, List<Cell> cells, boolean xyOrder, LayoutparsingVisualizations visualizations) {
|
||||
|
||||
CleanRulings usedRulings = RectangleTransformations.extractRulings(cells);
|
||||
|
||||
var zones = docstrumSegmentationService.segmentPage(textPositions, xyOrder);
|
||||
|
||||
if (!textPositions.isEmpty()) {
|
||||
visualizations.addZoneVisualizations(zones, textPositions.get(0).getPage());
|
||||
visualizations.addLineVisualizations(zones, textPositions.get(0).getPage());
|
||||
visualizations.addCharactersWithNeighbours(zones, textPositions.get(0).getPage());
|
||||
}
|
||||
|
||||
var pageBlocks = toAbstractPageBlocks(zones, usedRulings.getHorizontal(), usedRulings.getVertical(), xyOrder);
|
||||
|
||||
var classificationPage = new ClassificationPage(pageBlocks);
|
||||
@ -58,18 +66,20 @@ public class DocstrumBlockificationService {
|
||||
zones.forEach(zone -> {
|
||||
|
||||
List<TextPositionSequence> textPositionSequences = new ArrayList<>();
|
||||
zone.getLines().forEach(line -> {
|
||||
line.getWords().forEach(word -> {
|
||||
textPositionSequences.add(new TextPositionSequence(word.getTextPositions(), word.getPage()));
|
||||
});
|
||||
});
|
||||
zone.getLines()
|
||||
.forEach(line -> {
|
||||
line.getWords()
|
||||
.forEach(word -> {
|
||||
textPositionSequences.add(new TextPositionSequence(word.getTextPositions(), word.getPage()));
|
||||
});
|
||||
});
|
||||
|
||||
abstractPageBlocks.addAll(splitZonesAtRulings(textPositionSequences, horizontalRulings, verticalRulings));
|
||||
});
|
||||
|
||||
if (xyOrder) {
|
||||
abstractPageBlocks.sort(Comparator.comparing(AbstractPageBlock::getMinY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))
|
||||
.thenComparing(AbstractPageBlock::getMinX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)));
|
||||
.thenComparing(AbstractPageBlock::getMinX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)));
|
||||
abstractPageBlocks.sort(new Comparator<AbstractPageBlock>() {
|
||||
@Override
|
||||
public int compare(AbstractPageBlock o1, AbstractPageBlock o2) {
|
||||
@ -134,8 +144,8 @@ public class DocstrumBlockificationService {
|
||||
private boolean isOnlyIntersectingYAndOnelineOrPrevoiusTwoLines(TextPageBlock previous, TextPageBlock current, ClassificationPage page) {
|
||||
|
||||
return current.intersectsY(previous) //
|
||||
&& (previous.getNumberOfLines() == 1 && current.getNumberOfLines() >= 1 || previous.getNumberOfLines() == 2 && current.getNumberOfLines() == 1) //
|
||||
&& numberOfYIntersectionsOfSmallerBlocksWithOtherBlocks(current, previous, page.getTextBlocks()) <= 0;
|
||||
&& (previous.getNumberOfLines() == 1 && current.getNumberOfLines() >= 1 || previous.getNumberOfLines() == 2 && current.getNumberOfLines() == 1) //
|
||||
&& numberOfYIntersectionsOfSmallerBlocksWithOtherBlocks(current, previous, page.getTextBlocks()) <= 0;
|
||||
}
|
||||
|
||||
|
||||
@ -144,16 +154,16 @@ public class DocstrumBlockificationService {
|
||||
ClassificationPage page) {
|
||||
|
||||
return (Math.abs(previous.getMaxY() - current.getMaxY()) < THRESHOLD || Math.abs(previous.getMinY() - current.getMinY()) < THRESHOLD) //
|
||||
&& (previous.getNumberOfLines() == 1 && current.getNumberOfLines() >= 1 || previous.getNumberOfLines() >= 2 && current.getNumberOfLines() == 1) //
|
||||
&& !hasBetween(current, previous, page.getTextBlocks()) && numberOfYIntersectionsOfSmallerBlocksWithOtherBlocks(current, previous, page.getTextBlocks()) <= 4;
|
||||
&& (previous.getNumberOfLines() == 1 && current.getNumberOfLines() >= 1 || previous.getNumberOfLines() >= 2 && current.getNumberOfLines() == 1) //
|
||||
&& !hasBetween(current, previous, page.getTextBlocks()) && numberOfYIntersectionsOfSmallerBlocksWithOtherBlocks(current, previous, page.getTextBlocks()) <= 4;
|
||||
}
|
||||
|
||||
|
||||
private boolean areTheOnlyTwoBlocksOnHeightsWithBothMoreThanTwoLines(TextPageBlock previous, TextPageBlock current, ClassificationPage page) {
|
||||
|
||||
return previous.getNumberOfLines() >= 2 && current.getNumberOfLines() >= 2 //
|
||||
&& previous.intersectsY(current) //
|
||||
&& numberOfYIntersectionsOfSmallerBlocksWithOtherBlocks(current, previous, page.getTextBlocks()) == 0;
|
||||
&& previous.intersectsY(current) //
|
||||
&& numberOfYIntersectionsOfSmallerBlocksWithOtherBlocks(current, previous, page.getTextBlocks()) == 0;
|
||||
}
|
||||
|
||||
|
||||
@ -213,7 +223,7 @@ public class DocstrumBlockificationService {
|
||||
ListIterator<AbstractPageBlock> itty = blocks.listIterator();
|
||||
while (itty.hasNext()) {
|
||||
AbstractPageBlock block = itty.next();
|
||||
if(block == null){
|
||||
if (block == null) {
|
||||
continue;
|
||||
}
|
||||
if (block instanceof TablePageBlock) {
|
||||
@ -224,7 +234,7 @@ public class DocstrumBlockificationService {
|
||||
|
||||
for (int i = 0; i < blocks.size(); i++) {
|
||||
|
||||
if(blocks.get(i) == null){
|
||||
if (blocks.get(i) == null) {
|
||||
continue;
|
||||
}
|
||||
if (blocks.get(i) == current) {
|
||||
@ -249,8 +259,8 @@ public class DocstrumBlockificationService {
|
||||
}
|
||||
}
|
||||
var blocksIterator = blocks.iterator();
|
||||
while(blocksIterator.hasNext()){
|
||||
if(blocksIterator.next() == null){
|
||||
while (blocksIterator.hasNext()) {
|
||||
if (blocksIterator.next() == null) {
|
||||
blocksIterator.remove();
|
||||
}
|
||||
}
|
||||
@ -338,11 +348,11 @@ public class DocstrumBlockificationService {
|
||||
|
||||
if (textBlock == null) {
|
||||
textBlock = new TextPageBlock(wordBlock.getMinXDirAdj(),
|
||||
wordBlock.getMaxXDirAdj(),
|
||||
wordBlock.getMinYDirAdj(),
|
||||
wordBlock.getMaxYDirAdj(),
|
||||
wordBlockList,
|
||||
wordBlock.getRotation());
|
||||
wordBlock.getMaxXDirAdj(),
|
||||
wordBlock.getMinYDirAdj(),
|
||||
wordBlock.getMaxYDirAdj(),
|
||||
wordBlockList,
|
||||
wordBlock.getRotation());
|
||||
} else {
|
||||
TextPageBlock spatialEntity = textBlock.union(wordBlock);
|
||||
textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(), spatialEntity.getWidth(), spatialEntity.getHeight());
|
||||
@ -358,7 +368,12 @@ public class DocstrumBlockificationService {
|
||||
textBlock.setHighestFontSize(fontSizeFrequencyCounter.getHighest());
|
||||
}
|
||||
|
||||
if (textBlock != null && textBlock.getSequences() != null && textBlock.getSequences().stream().map(t -> round(t.getMinYDirAdj(), 3)).collect(toSet()).size() == 1) {
|
||||
if (textBlock != null
|
||||
&& textBlock.getSequences() != null
|
||||
&& textBlock.getSequences()
|
||||
.stream()
|
||||
.map(t -> round(t.getMinYDirAdj(), 3))
|
||||
.collect(toSet()).size() == 1) {
|
||||
textBlock.getSequences().sort(Comparator.comparing(TextPositionSequence::getMinXDirAdj));
|
||||
}
|
||||
return textBlock;
|
||||
@ -373,38 +388,34 @@ public class DocstrumBlockificationService {
|
||||
List<Ruling> horizontalRulingLines,
|
||||
List<Ruling> verticalRulingLines) {
|
||||
|
||||
return isSplitByRuling(maxX,
|
||||
minY,
|
||||
word.getMinXDirAdj(),
|
||||
word.getMinYDirAdj(),
|
||||
verticalRulingLines,
|
||||
word.getDir().getDegrees(),
|
||||
word.getPageWidth(),
|
||||
word.getPageHeight()) //
|
||||
|| isSplitByRuling(minX,
|
||||
minY,
|
||||
word.getMinXDirAdj(),
|
||||
word.getMaxYDirAdj(),
|
||||
horizontalRulingLines,
|
||||
word.getDir().getDegrees(),
|
||||
word.getPageWidth(),
|
||||
word.getPageHeight()) //
|
||||
|| isSplitByRuling(maxX,
|
||||
minY,
|
||||
word.getMinXDirAdj(),
|
||||
word.getMinYDirAdj(),
|
||||
horizontalRulingLines,
|
||||
word.getDir().getDegrees(),
|
||||
word.getPageWidth(),
|
||||
word.getPageHeight()) //
|
||||
|| isSplitByRuling(minX,
|
||||
minY,
|
||||
word.getMinXDirAdj(),
|
||||
word.getMaxYDirAdj(),
|
||||
verticalRulingLines,
|
||||
word.getDir().getDegrees(),
|
||||
word.getPageWidth(),
|
||||
word.getPageHeight());
|
||||
return isSplitByRuling(maxX, minY, word.getMinXDirAdj(), word.getMinYDirAdj(), verticalRulingLines, word.getDir().getDegrees(), word.getPageWidth(), word.getPageHeight())
|
||||
//
|
||||
|| isSplitByRuling(minX,
|
||||
minY,
|
||||
word.getMinXDirAdj(),
|
||||
word.getMaxYDirAdj(),
|
||||
horizontalRulingLines,
|
||||
word.getDir().getDegrees(),
|
||||
word.getPageWidth(),
|
||||
word.getPageHeight())
|
||||
//
|
||||
|| isSplitByRuling(maxX,
|
||||
minY,
|
||||
word.getMinXDirAdj(),
|
||||
word.getMinYDirAdj(),
|
||||
horizontalRulingLines,
|
||||
word.getDir().getDegrees(),
|
||||
word.getPageWidth(),
|
||||
word.getPageHeight())
|
||||
//
|
||||
|| isSplitByRuling(minX,
|
||||
minY,
|
||||
word.getMinXDirAdj(),
|
||||
word.getMaxYDirAdj(),
|
||||
verticalRulingLines,
|
||||
word.getDir().getDegrees(),
|
||||
word.getPageWidth(),
|
||||
word.getPageHeight());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -34,7 +34,7 @@ public class DocuMineBlockificationService {
|
||||
* This method must use text direction adjusted postions (DirAdj). Where {0,0} is on the upper left. Never try to change this!
|
||||
* Rulings (Table lines) must be adjusted to the text directions as well, when checking if a block is split by a ruling.
|
||||
*
|
||||
* @param textPositions The words of a page.
|
||||
* @param textPositions The textPositions of a page.
|
||||
* @param horizontalRulingLines Horizontal table lines.
|
||||
* @param verticalRulingLines Vertical table lines.
|
||||
* @return Page object that contains the Textblock and text statistics.
|
||||
|
||||
@ -7,7 +7,6 @@ import static java.util.stream.Collectors.toList;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@ -15,7 +14,6 @@ import java.util.NoSuchElementException;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
|
||||
@ -52,6 +50,9 @@ public class DocumentGraphFactory {
|
||||
public Document buildDocumentGraph(LayoutParsingType layoutParsingType, ClassificationDocument document) {
|
||||
|
||||
Document documentGraph = new Document();
|
||||
|
||||
documentGraph.setVisualizations(document.getVisualizations());
|
||||
|
||||
Context context = new Context(documentGraph);
|
||||
|
||||
document.getPages()
|
||||
@ -85,14 +86,11 @@ public class DocumentGraphFactory {
|
||||
|
||||
GenericSemanticNode node;
|
||||
if (originalTextBlock.isHeadline()) {
|
||||
node = Headline.builder().documentTree(context.getDocumentTree())
|
||||
.build();
|
||||
node = Headline.builder().documentTree(context.getDocumentTree()).build();
|
||||
} else if (originalTextBlock.isToDuplicate()) {
|
||||
node = DuplicatedParagraph.builder().documentTree(context.getDocumentTree())
|
||||
.build();
|
||||
node = DuplicatedParagraph.builder().documentTree(context.getDocumentTree()).build();
|
||||
} else {
|
||||
node = Paragraph.builder().documentTree(context.getDocumentTree())
|
||||
.build();
|
||||
node = Paragraph.builder().documentTree(context.getDocumentTree()).build();
|
||||
}
|
||||
|
||||
page.getMainBody().add(node);
|
||||
@ -178,8 +176,7 @@ public class DocumentGraphFactory {
|
||||
private void addFooter(List<TextPageBlock> textBlocks, Context context) {
|
||||
|
||||
Page page = context.getPage(textBlocks.get(0).getPage());
|
||||
Footer footer = Footer.builder().documentTree(context.getDocumentTree())
|
||||
.build();
|
||||
Footer footer = Footer.builder().documentTree(context.getDocumentTree()).build();
|
||||
AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeTextPositionSequence(textBlocks),
|
||||
footer,
|
||||
context,
|
||||
@ -194,8 +191,7 @@ public class DocumentGraphFactory {
|
||||
public void addHeader(List<TextPageBlock> textBlocks, Context context) {
|
||||
|
||||
Page page = context.getPage(textBlocks.get(0).getPage());
|
||||
Header header = Header.builder().documentTree(context.getDocumentTree())
|
||||
.build();
|
||||
Header header = Header.builder().documentTree(context.getDocumentTree()).build();
|
||||
AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeTextPositionSequence(textBlocks), header, 0, page);
|
||||
List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(header);
|
||||
header.setTreeId(tocId);
|
||||
@ -207,8 +203,7 @@ public class DocumentGraphFactory {
|
||||
private void addEmptyFooter(int pageIndex, Context context) {
|
||||
|
||||
Page page = context.getPage(pageIndex);
|
||||
Footer footer = Footer.builder().documentTree(context.getDocumentTree())
|
||||
.build();
|
||||
Footer footer = Footer.builder().documentTree(context.getDocumentTree()).build();
|
||||
AtomicTextBlock textBlock = context.textBlockFactory.emptyTextBlock(footer, context, page);
|
||||
List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(footer);
|
||||
footer.setTreeId(tocId);
|
||||
@ -220,8 +215,7 @@ public class DocumentGraphFactory {
|
||||
private void addEmptyHeader(int pageIndex, Context context) {
|
||||
|
||||
Page page = context.getPage(pageIndex);
|
||||
Header header = Header.builder().documentTree(context.getDocumentTree())
|
||||
.build();
|
||||
Header header = Header.builder().documentTree(context.getDocumentTree()).build();
|
||||
AtomicTextBlock textBlock = context.textBlockFactory.emptyTextBlock(header, 0, page);
|
||||
List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(header);
|
||||
header.setTreeId(tocId);
|
||||
|
||||
@ -29,19 +29,22 @@ public class SearchTextWithTextPositionFactory {
|
||||
|
||||
public SearchTextWithTextPositionDto buildSearchTextToTextPositionDto(List<TextPositionSequence> sequences) {
|
||||
|
||||
if (sequences.isEmpty() || sequences.stream().allMatch(sequence -> sequence.getTextPositions().isEmpty())) {
|
||||
if (sequences.isEmpty() || sequences.stream()
|
||||
.allMatch(sequence -> sequence.getTextPositions().isEmpty())) {
|
||||
return SearchTextWithTextPositionDto.empty();
|
||||
}
|
||||
|
||||
Context context = new Context();
|
||||
|
||||
RedTextPosition currentTextPosition = sequences.get(0).getTextPositions().get(0);
|
||||
RedTextPosition previousTextPosition = RedTextPosition.builder().unicode(" ").position(currentTextPosition.getPosition()).build();
|
||||
RedTextPosition currentTextPosition = sequences.get(0).getTextPositions()
|
||||
.get(0);
|
||||
RedTextPosition previousTextPosition = RedTextPosition.builder().unicode(" ").directionAdjustedPosition(currentTextPosition.getDirectionAdjustedPosition()).build();
|
||||
|
||||
for (TextPositionSequence word : sequences) {
|
||||
for (int i = 0; i < word.getTextPositions().size(); ++i) {
|
||||
|
||||
currentTextPosition = word.getTextPositions().get(i);
|
||||
currentTextPosition = word.getTextPositions()
|
||||
.get(i);
|
||||
if (isLineBreak(currentTextPosition, previousTextPosition)) {
|
||||
removeHyphenLinebreaks(context);
|
||||
context.lineBreaksStringIdx.add(context.stringIdx);
|
||||
@ -57,7 +60,7 @@ public class SearchTextWithTextPositionFactory {
|
||||
++context.positionIdx;
|
||||
}
|
||||
|
||||
previousTextPosition = RedTextPosition.builder().unicode(" ").position(previousTextPosition.getPosition()).build();
|
||||
previousTextPosition = RedTextPosition.builder().unicode(" ").directionAdjustedPosition(previousTextPosition.getDirectionAdjustedPosition()).build();
|
||||
context.stringBuilder.append(" ");
|
||||
context.stringIdxToPositionIdx.add(context.positionIdx);
|
||||
++context.stringIdx;
|
||||
@ -66,7 +69,7 @@ public class SearchTextWithTextPositionFactory {
|
||||
assert context.stringBuilder.length() == context.stringIdxToPositionIdx.size();
|
||||
|
||||
List<Rectangle2D> positions = sequences.stream()
|
||||
.flatMap(sequence -> sequence.getTextPositions().stream().map(textPosition -> mapRedTextPositionToInitialUserSpace(textPosition, sequence)))
|
||||
.map(TextPositionSequence::getBoundingBox)
|
||||
.toList();
|
||||
|
||||
return SearchTextWithTextPositionDto.builder()
|
||||
@ -153,7 +156,7 @@ public class SearchTextWithTextPositionFactory {
|
||||
return false;
|
||||
}
|
||||
|
||||
float deltaY = Math.abs(currentPosition.getYDirAdj() - previousPosition.getYDirAdj());
|
||||
double deltaY = Math.abs(currentPosition.getYDirAdj() - previousPosition.getYDirAdj());
|
||||
return deltaY >= currentPosition.getHeightDir();
|
||||
}
|
||||
|
||||
@ -167,16 +170,16 @@ public class SearchTextWithTextPositionFactory {
|
||||
private boolean isHyphen(String unicodeCharacter) {
|
||||
|
||||
return Objects.equals(unicodeCharacter, "-") || //
|
||||
Objects.equals(unicodeCharacter, "~") || //
|
||||
Objects.equals(unicodeCharacter, "‐") || //
|
||||
Objects.equals(unicodeCharacter, "‒") || //
|
||||
Objects.equals(unicodeCharacter, "⁻") || //
|
||||
Objects.equals(unicodeCharacter, "−") || //
|
||||
Objects.equals(unicodeCharacter, "﹣") || //
|
||||
Objects.equals(unicodeCharacter, "゠") || //
|
||||
Objects.equals(unicodeCharacter, "⁓") || //
|
||||
Objects.equals(unicodeCharacter, "‑") || //
|
||||
Objects.equals(unicodeCharacter, "\u00AD");
|
||||
Objects.equals(unicodeCharacter, "~") || //
|
||||
Objects.equals(unicodeCharacter, "‐") || //
|
||||
Objects.equals(unicodeCharacter, "‒") || //
|
||||
Objects.equals(unicodeCharacter, "⁻") || //
|
||||
Objects.equals(unicodeCharacter, "−") || //
|
||||
Objects.equals(unicodeCharacter, "﹣") || //
|
||||
Objects.equals(unicodeCharacter, "゠") || //
|
||||
Objects.equals(unicodeCharacter, "⁓") || //
|
||||
Objects.equals(unicodeCharacter, "‑") || //
|
||||
Objects.equals(unicodeCharacter, "\u00AD");
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -82,7 +82,6 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
|
||||
|
||||
private int pageRotation;
|
||||
private PDRectangle pageSize;
|
||||
private Matrix translateMatrix;
|
||||
private final GlyphList glyphList;
|
||||
private final Map<COSDictionary, Float> fontHeightMap = new WeakHashMap<COSDictionary, Float>();
|
||||
|
||||
@ -134,12 +133,6 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
|
||||
this.pageRotation = page.getRotation();
|
||||
this.pageSize = page.getCropBox();
|
||||
|
||||
if (pageSize.getLowerLeftX() == 0 && pageSize.getLowerLeftY() == 0) {
|
||||
translateMatrix = null;
|
||||
} else {
|
||||
// translation matrix for cropbox
|
||||
translateMatrix = Matrix.getTranslateInstance(-pageSize.getLowerLeftX(), -pageSize.getLowerLeftY());
|
||||
}
|
||||
super.processPage(page);
|
||||
}
|
||||
|
||||
@ -265,62 +258,52 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine {
|
||||
}
|
||||
}
|
||||
|
||||
// adjust for cropbox if needed
|
||||
Matrix translatedTextRenderingMatrix;
|
||||
if (translateMatrix == null) {
|
||||
translatedTextRenderingMatrix = textRenderingMatrix;
|
||||
} else {
|
||||
translatedTextRenderingMatrix = Matrix.concatenate(translateMatrix, textRenderingMatrix);
|
||||
nextX -= pageSize.getLowerLeftX();
|
||||
nextY -= pageSize.getLowerLeftY();
|
||||
}
|
||||
|
||||
// This is a hack for unicode letter with 2 chars e.g. RA see unicodeProblem.pdf
|
||||
if (unicodeMapping.length() == 2) {
|
||||
processTextPosition(new TextPosition(pageRotation,
|
||||
pageSize.getWidth(),
|
||||
pageSize.getHeight(),
|
||||
translatedTextRenderingMatrix,
|
||||
nextX,
|
||||
nextY,
|
||||
Math.abs(dyDisplay),
|
||||
dxDisplay,
|
||||
Math.abs(spaceWidthDisplay),
|
||||
Character.toString(unicodeMapping.charAt(0)),
|
||||
new int[]{code},
|
||||
font,
|
||||
fontSize,
|
||||
(int) (fontSize * textMatrix.getScalingFactorX())));
|
||||
pageSize.getWidth(),
|
||||
pageSize.getHeight(),
|
||||
textRenderingMatrix,
|
||||
nextX,
|
||||
nextY,
|
||||
Math.abs(dyDisplay),
|
||||
dxDisplay,
|
||||
Math.abs(spaceWidthDisplay),
|
||||
Character.toString(unicodeMapping.charAt(0)),
|
||||
new int[]{code},
|
||||
font,
|
||||
fontSize,
|
||||
(int) (fontSize * textMatrix.getScalingFactorX())));
|
||||
processTextPosition(new TextPosition(pageRotation,
|
||||
pageSize.getWidth(),
|
||||
pageSize.getHeight(),
|
||||
translatedTextRenderingMatrix,
|
||||
nextX,
|
||||
nextY,
|
||||
Math.abs(dyDisplay),
|
||||
dxDisplay,
|
||||
Math.abs(spaceWidthDisplay),
|
||||
Character.toString(unicodeMapping.charAt(1)),
|
||||
new int[]{code},
|
||||
font,
|
||||
fontSize,
|
||||
(int) (fontSize * textMatrix.getScalingFactorX())));
|
||||
pageSize.getWidth(),
|
||||
pageSize.getHeight(),
|
||||
textRenderingMatrix,
|
||||
nextX,
|
||||
nextY,
|
||||
Math.abs(dyDisplay),
|
||||
dxDisplay,
|
||||
Math.abs(spaceWidthDisplay),
|
||||
Character.toString(unicodeMapping.charAt(1)),
|
||||
new int[]{code},
|
||||
font,
|
||||
fontSize,
|
||||
(int) (fontSize * textMatrix.getScalingFactorX())));
|
||||
} else {
|
||||
|
||||
processTextPosition(new TextPosition(pageRotation,
|
||||
pageSize.getWidth(),
|
||||
pageSize.getHeight(),
|
||||
translatedTextRenderingMatrix,
|
||||
nextX,
|
||||
nextY,
|
||||
Math.abs(dyDisplay),
|
||||
dxDisplay,
|
||||
Math.abs(spaceWidthDisplay),
|
||||
unicodeMapping,
|
||||
new int[]{code},
|
||||
font,
|
||||
fontSize,
|
||||
(int) (fontSize * textMatrix.getScalingFactorX())));
|
||||
pageSize.getWidth(),
|
||||
pageSize.getHeight(),
|
||||
textRenderingMatrix,
|
||||
nextX,
|
||||
nextY,
|
||||
Math.abs(dyDisplay),
|
||||
dxDisplay,
|
||||
Math.abs(spaceWidthDisplay),
|
||||
unicodeMapping,
|
||||
new int[]{code},
|
||||
font,
|
||||
fontSize,
|
||||
(int) (fontSize * textMatrix.getScalingFactorX())));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1007,7 +1007,7 @@ public class PDFTextStripper extends LegacyPDFStreamEngine {
|
||||
|
||||
/**
|
||||
* Set the desired word separator for output text. The PDFBox text extraction algorithm will output a space
|
||||
* character if there is enough space between two words. By default a space character is used. If you need and
|
||||
* character if there is enough space between two textPositions. By default a space character is used. If you need and
|
||||
* accurate count of characters that are found in a PDF document then you might want to set the word separator to
|
||||
* the empty string.
|
||||
*
|
||||
@ -1703,7 +1703,7 @@ public class PDFTextStripper extends LegacyPDFStreamEngine {
|
||||
/**
|
||||
* Write a list of string containing a whole line of a document.
|
||||
*
|
||||
* @param line a list with the words of the given line
|
||||
* @param line a list with the textPositions of the given line
|
||||
* @throws IOException if something went wrong
|
||||
*/
|
||||
private void writeLine(List<WordWithTextPositions> line, boolean isParagraphEnd) throws IOException {
|
||||
@ -1744,9 +1744,9 @@ public class PDFTextStripper extends LegacyPDFStreamEngine {
|
||||
|
||||
|
||||
/**
|
||||
* Handles the LTR and RTL direction of the given words. The whole implementation stands and falls with the given
|
||||
* word. If the word is a full line, the results will be the best. If the word contains of single words or
|
||||
* characters, the order of the characters in a word or words in a line may wrong, due to RTL and LTR marks and
|
||||
* Handles the LTR and RTL direction of the given textPositions. The whole implementation stands and falls with the given
|
||||
* word. If the word is a full line, the results will be the best. If the word contains of single textPositions or
|
||||
* characters, the order of the characters in a word or textPositions in a line may wrong, due to RTL and LTR marks and
|
||||
* characters!
|
||||
* <p>
|
||||
* Based on http://www.nesterovsky-bros.com/weblog/2013/07/28/VisualToLogicalConversionInJava.aspx
|
||||
|
||||
@ -70,7 +70,9 @@ public class LayoutGridService {
|
||||
Visualizations layoutGrid = this.addLayoutGrid(document, layerVisibilityDefaultValue, false);
|
||||
Visualizations visualLayoutGrid = this.addLayoutGrid(document, layerVisibilityDefaultValue, true);
|
||||
|
||||
viewerDocumentService.addVisualizationsOnPage(originFile, destinationFile, List.of(layoutGrid, visualLayoutGrid));
|
||||
List<Visualizations> allVisualizations = Stream.concat(Stream.of(layoutGrid, visualLayoutGrid), document.getVisualizations().streamAll()).toList();
|
||||
|
||||
viewerDocumentService.addVisualizationsOnPage(originFile, destinationFile, allVisualizations);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,12 +1,5 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.utils;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
|
||||
import lombok.experimental.UtilityClass;
|
||||
import org.apache.pdfbox.cos.COSName;
|
||||
import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
@ -14,13 +7,24 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.pdfbox.cos.COSName;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class MarkedContentUtils {
|
||||
|
||||
public static final String HEADER = "Header";
|
||||
public static final String FOOTER = "Footer";
|
||||
|
||||
public List<Rectangle2D> getMarkedContentBboxPerLine(List<PDMarkedContent> markedContents, String subtype) {
|
||||
|
||||
public List<Rectangle2D> getMarkedContentBboxPerLine(List<PDMarkedContent> markedContents, String subtype, PDPage pdPage) {
|
||||
|
||||
if (markedContents == null) {
|
||||
return Collections.emptyList();
|
||||
@ -31,7 +35,8 @@ public class MarkedContentUtils {
|
||||
.filter(m -> m.getProperties() != null)
|
||||
.filter(m -> m.getProperties().getItem("Subtype") != null)
|
||||
.filter(m -> ((COSName) m.getProperties().getItem("Subtype")).getName().equals(subtype))
|
||||
.map(PDMarkedContent::getContents).flatMap(Collection::stream)
|
||||
.map(PDMarkedContent::getContents)
|
||||
.flatMap(Collection::stream)
|
||||
.filter(t -> t instanceof TextPosition)
|
||||
.map(t -> (TextPosition) t)
|
||||
.filter(t -> !t.getUnicode().equals(" "))
|
||||
@ -41,16 +46,77 @@ public class MarkedContentUtils {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
return markedContentByYPosition.values().stream()
|
||||
.map(textPositions -> new TextPositionSequence(textPositions.stream()
|
||||
.toList(), 0, true)
|
||||
.getRectangle())
|
||||
.map(t -> new Rectangle2D.Float(t.getTopLeft().getX(), t.getTopLeft().getY() - Math.abs(t.getHeight()), t.getWidth(), Math.abs(t.getHeight()))).collect(Collectors.toList());
|
||||
return markedContentByYPosition.values()
|
||||
.stream()
|
||||
.map(textPositions -> new TextPositionSequence(textPositions, 0, true).getRectangle())
|
||||
.map(t -> new Rectangle2D.Float(t.getTopLeft().getX(), t.getTopLeft().getY() - Math.abs(t.getHeight()), t.getWidth(), Math.abs(t.getHeight())))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
public List<MarkedContentPosition> getMarkedContentPositions(List<PDMarkedContent> markedContents, PDPage pdPage) {
|
||||
|
||||
if (markedContents == null) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
return markedContents.stream()
|
||||
.filter(m -> !m.getContents().isEmpty())
|
||||
.map(markedContent -> MarkedContentPosition.fromPDMarkedContent(markedContent, pdPage))
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
public boolean intersects(TextPageBlock textBlock, Map<String, List<Rectangle2D>> markedContentBboxPerType, String type) {
|
||||
return markedContentBboxPerType.get(type) != null && markedContentBboxPerType.get(type).stream().anyMatch(rectangle -> rectangle.intersects(textBlock.getPdfMinX(), textBlock.getPdfMinY(), textBlock.getWidth(), textBlock.getHeight()));
|
||||
|
||||
return markedContentBboxPerType.get(type) != null && markedContentBboxPerType.get(type)
|
||||
.stream()
|
||||
.anyMatch(rectangle -> rectangle.intersects(textBlock.getPdfMinX(), textBlock.getPdfMinY(), textBlock.getWidth(), textBlock.getHeight()));
|
||||
}
|
||||
|
||||
|
||||
public record MarkedContentPosition(String type, String subType, List<Rectangle2D> textPositions) {
|
||||
|
||||
public static MarkedContentPosition fromPDMarkedContent(PDMarkedContent markedContent, PDPage pdPage) {
|
||||
|
||||
return new MarkedContentPosition(markedContent.getTag(), parseSubType(markedContent), parseTextPositions(markedContent.getContents(), pdPage));
|
||||
}
|
||||
|
||||
|
||||
private static List<Rectangle2D> parseTextPositions(List<Object> contents, PDPage pdPage) {
|
||||
|
||||
return contents.stream()
|
||||
.filter(content -> content instanceof TextPosition)
|
||||
.map(content -> (TextPosition) content)
|
||||
.filter(content -> !content.getUnicode().equals(" "))
|
||||
.map(textPositions -> new TextPositionSequence(List.of(textPositions), 0, true).getRectangle())
|
||||
.map(t -> new Rectangle2D.Float(t.getTopLeft().getX(), t.getTopLeft().getY() - Math.abs(t.getHeight()), t.getWidth(), Math.abs(t.getHeight())))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
private static String parseSubType(PDMarkedContent markedContent) {
|
||||
|
||||
if (markedContent == null || markedContent.getProperties() == null || markedContent.getProperties().getItem("Subtype") == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return ((COSName) markedContent.getProperties().getItem("Subtype")).getName();
|
||||
}
|
||||
|
||||
|
||||
public String formattedType() {
|
||||
|
||||
if (subType == null || subType.isEmpty()) {
|
||||
return type;
|
||||
}
|
||||
if (type.equals("Artifact")) {
|
||||
return subType;
|
||||
}
|
||||
return String.format("%s-%s", type, subType);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -52,7 +52,10 @@ public class RectangleTransformations {
|
||||
|
||||
public static Rectangle2D bBoxUnionAtomicTextBlock(List<AtomicTextBlock> atomicTextBlocks) {
|
||||
|
||||
return atomicTextBlocks.stream().flatMap(atomicTextBlock -> atomicTextBlock.getPositions().stream()).collect(new Rectangle2DBBoxCollector());
|
||||
return atomicTextBlocks.stream()
|
||||
.flatMap(atomicTextBlock -> atomicTextBlock.getPositions()
|
||||
.stream())
|
||||
.collect(new Rectangle2DBBoxCollector());
|
||||
}
|
||||
|
||||
|
||||
@ -77,7 +80,10 @@ public class RectangleTransformations {
|
||||
|
||||
public static Rectangle2D atomicTextBlockBBox(List<AtomicTextBlock> atomicTextBlocks) {
|
||||
|
||||
return atomicTextBlocks.stream().flatMap(atomicTextBlock -> atomicTextBlock.getPositions().stream()).collect(new Rectangle2DBBoxCollector());
|
||||
return atomicTextBlocks.stream()
|
||||
.flatMap(atomicTextBlock -> atomicTextBlock.getPositions()
|
||||
.stream())
|
||||
.collect(new Rectangle2DBBoxCollector());
|
||||
}
|
||||
|
||||
|
||||
@ -89,16 +95,18 @@ public class RectangleTransformations {
|
||||
|
||||
public static Rectangle2D rectangleBBox(List<Rectangle> rectangles) {
|
||||
|
||||
return rectangles.stream().map(RectangleTransformations::toRectangle2D).collect(new Rectangle2DBBoxCollector());
|
||||
return rectangles.stream()
|
||||
.map(RectangleTransformations::toRectangle2D)
|
||||
.collect(new Rectangle2DBBoxCollector());
|
||||
}
|
||||
|
||||
|
||||
public static Rectangle2D toRectangle2D(Rectangle redactionLogRectangle) {
|
||||
|
||||
return new Rectangle2D.Double(redactionLogRectangle.getTopLeft().getX(),
|
||||
redactionLogRectangle.getTopLeft().getY() + redactionLogRectangle.getHeight(),
|
||||
redactionLogRectangle.getWidth(),
|
||||
-redactionLogRectangle.getHeight());
|
||||
redactionLogRectangle.getTopLeft().getY() + redactionLogRectangle.getHeight(),
|
||||
redactionLogRectangle.getWidth(),
|
||||
-redactionLogRectangle.getHeight());
|
||||
}
|
||||
|
||||
|
||||
@ -111,15 +119,16 @@ public class RectangleTransformations {
|
||||
public static Rectangle toRedactionLogRectangle(Rectangle2D rectangle2D, int pageNumber) {
|
||||
|
||||
return new Rectangle(new Point((float) rectangle2D.getMinX(), (float) (rectangle2D.getMinY() + rectangle2D.getHeight())),
|
||||
(float) rectangle2D.getWidth(),
|
||||
-(float) rectangle2D.getHeight(),
|
||||
pageNumber);
|
||||
(float) rectangle2D.getWidth(),
|
||||
-(float) rectangle2D.getHeight(),
|
||||
pageNumber);
|
||||
}
|
||||
|
||||
|
||||
public static Rectangle2D rectangle2DBBox(List<Rectangle2D> rectangle2DList) {
|
||||
|
||||
return rectangle2DList.stream().collect(new Rectangle2DBBoxCollector());
|
||||
return rectangle2DList.stream()
|
||||
.collect(new Rectangle2DBBoxCollector());
|
||||
}
|
||||
|
||||
|
||||
@ -134,7 +143,9 @@ public class RectangleTransformations {
|
||||
if (rectangle2DList.isEmpty()) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
double splitThreshold = rectangle2DList.stream().mapToDouble(RectangularShape::getWidth).average().orElse(5) * 5.0;
|
||||
double splitThreshold = rectangle2DList.stream()
|
||||
.mapToDouble(RectangularShape::getWidth).average()
|
||||
.orElse(5) * 5.0;
|
||||
|
||||
List<List<Rectangle2D>> rectangleListsWithGaps = new LinkedList<>();
|
||||
List<Rectangle2D> rectangleListWithoutGaps = new LinkedList<>();
|
||||
@ -195,9 +206,9 @@ public class RectangleTransformations {
|
||||
public BinaryOperator<BBox> combiner() {
|
||||
|
||||
return (b1, b2) -> new BBox(Math.min(b1.lowerLeftX, b2.lowerLeftX),
|
||||
Math.min(b1.lowerLeftY, b2.lowerLeftY),
|
||||
Math.max(b1.upperRightX, b2.upperRightX),
|
||||
Math.max(b1.upperRightY, b2.upperRightY));
|
||||
Math.min(b1.lowerLeftY, b2.lowerLeftY),
|
||||
Math.max(b1.upperRightX, b2.upperRightX),
|
||||
Math.max(b1.upperRightY, b2.upperRightY));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -14,23 +14,24 @@ public class RectangularIntersectionFinder {
|
||||
|
||||
public static List<Rectangle2D> find(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
|
||||
|
||||
// Fix for 211.pdf
|
||||
for (Ruling r : horizontalRulingLines) {
|
||||
if (r.getX2() < r.getX1()) {
|
||||
double a = r.getX2();
|
||||
r.x2 = (float) r.getX1();
|
||||
r.x1 = (float) a;
|
||||
}
|
||||
}
|
||||
// // Fix for 211.pdf
|
||||
// for (Ruling r : horizontalRulingLines) {
|
||||
// if (r.getX2() < r.getX1()) {
|
||||
// double a = r.getX2();
|
||||
// r.x2 = (float) r.getX1();
|
||||
// r.x1 = (float) a;
|
||||
// }
|
||||
// }
|
||||
|
||||
List<Rectangle2D> foundRectangles = new ArrayList<>();
|
||||
Map<Point2D, Ruling[]> intersectionPoints = Ruling.findIntersections(horizontalRulingLines, verticalRulingLines);
|
||||
Map<Point2D, RulingIntersectionFinder.IntersectingRulings> intersectionPoints = RulingIntersectionFinder.findNaive(horizontalRulingLines, verticalRulingLines);
|
||||
|
||||
List<Point2D> intersectionPointsList = new ArrayList<>(intersectionPoints.keySet());
|
||||
intersectionPointsList.sort(Y_FIRST_POINT_COMPARATOR);
|
||||
|
||||
for (int i = 0; i < intersectionPointsList.size(); i++) {
|
||||
Point2D topLeft = intersectionPointsList.get(i);
|
||||
Ruling[] hv = intersectionPoints.get(topLeft);
|
||||
RulingIntersectionFinder.IntersectingRulings intersectingRulingsFromTopLeft = intersectionPoints.get(topLeft);
|
||||
|
||||
// CrossingPointsDirectlyBelow( topLeft );
|
||||
List<Point2D> xPoints = new ArrayList<>();
|
||||
@ -48,18 +49,19 @@ public class RectangularIntersectionFinder {
|
||||
outer:
|
||||
for (Point2D xPoint : xPoints) {
|
||||
// is there a vertical edge b/w topLeft and xPoint?
|
||||
if (!hv[1].equals(intersectionPoints.get(xPoint)[1])) {
|
||||
if (!intersectingRulingsFromTopLeft.vertical().equals(intersectionPoints.get(xPoint).vertical())) {
|
||||
continue;
|
||||
}
|
||||
for (Point2D yPoint : yPoints) {
|
||||
// is there a horizontal edge b/w topLeft and yPoint ?
|
||||
if (!hv[0].equals(intersectionPoints.get(yPoint)[0])) {
|
||||
if (!intersectingRulingsFromTopLeft.horizontal().equals(intersectionPoints.get(yPoint).horizontal())) {
|
||||
continue;
|
||||
}
|
||||
Point2D btmRight = new Point2D.Float((float) yPoint.getX(), (float) xPoint.getY());
|
||||
if (intersectionPoints.containsKey(btmRight)
|
||||
&& intersectionPoints.get(btmRight)[0].equals(intersectionPoints.get(xPoint)[0])
|
||||
&& intersectionPoints.get(btmRight)[1].equals(intersectionPoints.get(yPoint)[1])) {
|
||||
&& intersectionPoints.get(btmRight).horizontal().equals(intersectionPoints.get(xPoint).horizontal())
|
||||
&& intersectionPoints.get(btmRight).vertical().equals(intersectionPoints.get(yPoint).vertical())) {
|
||||
|
||||
foundRectangles.add(new Rectangle2D.Double(topLeft.getX(), topLeft.getY(), btmRight.getX() - topLeft.getX(), btmRight.getY() - topLeft.getY()));
|
||||
break outer;
|
||||
}
|
||||
|
||||
@ -0,0 +1,201 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.utils;
|
||||
|
||||
import java.awt.geom.Point2D;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@UtilityClass
|
||||
public class RulingIntersectionFinder {
|
||||
|
||||
public static final int PERPENDICULAR_UNIT_EXPAND_AMOUNT = 2;
|
||||
|
||||
public static final Comparator<Point2D> Y_THEN_X_POINT_COMPARATOR = Comparator.comparingDouble(Point2D::getY).thenComparing(Point2D::getX);
|
||||
|
||||
|
||||
/**
|
||||
* Implementation to find line intersection in O(P + n log n), where n is the number of lines and P the numer of intersections
|
||||
* based on <a href="http://people.csail.mit.edu/indyk/6.838-old/handouts/lec2.pdf">Segment Intersection by Piotr Indyk</a>
|
||||
* The algorithm assumes there are only horizontal and vertical lines which are unique in their coordinates. (E.g. no overlapping horizontal lines exist)
|
||||
* As a high level overview, the algorithm uses a sweep line advancing from left to right.
|
||||
* It dynamically updates the horizontal rulings which are intersected by the current sweep line.
|
||||
* When the sweep line hits a vertical line, it then checks for all intersections with the currently intersected horizontal rulings.
|
||||
* THe trick of the algorithm is using a binary search tree to store the currently intersected horizontal rulings. This way the lookup should be in O(log n).
|
||||
* This way the initial sorting step has the highest complexity class (O(n log n) and thus determines the complexity class of the entire algorithm
|
||||
*
|
||||
* Unfortunately, the implementation here takes a few liberties compared to the original algorithm. The binary search tree is replaced by an ordered Set which is simply looped over.
|
||||
* Therefore, this implementation's worst case, where all horizontal lines span the entire sweep, you are essentially performing the naive approach with a bunch of overhead.
|
||||
* Since we are using this implementation to find table cells, one can expect this worst case to always be the case.
|
||||
*
|
||||
* A simple runtime comparison for a single page with the most lines we can expect (SinglePages/AbsolutelyEnormousTable.pdf with 30 horizontals and 144 verticals) shows this implementation takes roughly 14 ms, whereas the naive approach takes 7 ms. Both are negligible, but the naive approach is two times as fast.
|
||||
*
|
||||
* If we would like to make this faster, we would need a better data structure for 'TreeMap<Ruling, Void> horizontalRulingsInCurrentSweep', where we can query the TreeMap for all horizontal rulings in a given interval in O(log n).
|
||||
*
|
||||
* @param horizontals a list of non-overlapping horizontal rulings
|
||||
* @param verticals a list of non-overlapping vertical rulings
|
||||
* @return a Map of each found intersection point pointing to the two lines forming the intersection.
|
||||
*/
|
||||
public Map<Point2D, IntersectingRulings> find(List<Ruling> horizontals, List<Ruling> verticals) {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
List<SweepStep> sweepTrajectory = buildSweepTrajectory(horizontals, verticals);
|
||||
|
||||
TreeMap<Ruling, Void> horizontalRulingsInCurrentSweep = new TreeMap<>(Comparator.comparingDouble(Ruling::getTop));
|
||||
|
||||
TreeMap<Point2D, IntersectingRulings> intersections = new TreeMap<>(Y_THEN_X_POINT_COMPARATOR);
|
||||
|
||||
for (SweepStep step : sweepTrajectory) {
|
||||
switch (step.type) {
|
||||
case VERTICAL: // check for intersections with currently intersected horizontal lines
|
||||
for (Ruling horizontalRuling : horizontalRulingsInCurrentSweep.navigableKeySet()) {
|
||||
|
||||
Optional<Point2D> intersectionPoint = findIntersectionPoint(horizontalRuling, step.ruling);
|
||||
|
||||
if (intersectionPoint.isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
intersections.put(intersectionPoint.get(), new IntersectingRulings(horizontalRuling, step.ruling));
|
||||
}
|
||||
break;
|
||||
case HORIZONTAL_ENTRY: // sweep line now intersects this horizontal ruling
|
||||
horizontalRulingsInCurrentSweep.put(step.ruling, null);
|
||||
break;
|
||||
case HORIZONTAL_EXIT: // sweep line no longer intersects this horizontal ruling
|
||||
horizontalRulingsInCurrentSweep.remove(step.ruling);
|
||||
break;
|
||||
}
|
||||
}
|
||||
log.debug("Finished building intersections with line sweep in {} ms", System.currentTimeMillis() - start);
|
||||
|
||||
return intersections;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Naive Approach in O(n^2) of finding intersections between lines by iterating over all lines.
|
||||
*
|
||||
* @param horizontals a list of non-overlapping horizontal rulings
|
||||
* @param verticals a list of non-overlapping vertical rulings
|
||||
* @return a Map of each found intersection point pointing to the two lines forming the intersection.
|
||||
*/
|
||||
public Map<Point2D, IntersectingRulings> findNaive(List<Ruling> horizontals, List<Ruling> verticals) {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
TreeMap<Point2D, IntersectingRulings> intersections = new TreeMap<>(Y_THEN_X_POINT_COMPARATOR);
|
||||
|
||||
for (Ruling horizontal : horizontals) {
|
||||
for (Ruling vertical : verticals) {
|
||||
Optional<Point2D> intersectionPoint = findIntersectionPoint(horizontal, vertical);
|
||||
|
||||
if (intersectionPoint.isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
intersections.put(intersectionPoint.get(), new IntersectingRulings(horizontal, vertical));
|
||||
}
|
||||
}
|
||||
log.debug("Finished building intersections naively in {} ms", System.currentTimeMillis() - start);
|
||||
|
||||
return intersections;
|
||||
}
|
||||
|
||||
|
||||
private static List<SweepStep> buildSweepTrajectory(List<Ruling> horizontals, List<Ruling> verticals) {
|
||||
|
||||
List<SweepStep> sweepTrajectory = new LinkedList<>();
|
||||
|
||||
for (Ruling horizontalRuling : horizontals) {
|
||||
sweepTrajectory.add(new SweepStep(SweepStep.Type.HORIZONTAL_ENTRY, horizontalRuling.getLeft() - PERPENDICULAR_UNIT_EXPAND_AMOUNT, horizontalRuling));
|
||||
sweepTrajectory.add(new SweepStep(SweepStep.Type.HORIZONTAL_EXIT, horizontalRuling.getRight() + PERPENDICULAR_UNIT_EXPAND_AMOUNT, horizontalRuling));
|
||||
}
|
||||
|
||||
for (Ruling verticalRuling : verticals) {
|
||||
sweepTrajectory.add(new SweepStep(SweepStep.Type.VERTICAL, verticalRuling.getLeft(), verticalRuling));
|
||||
}
|
||||
|
||||
Collections.sort(sweepTrajectory);
|
||||
|
||||
return sweepTrajectory;
|
||||
}
|
||||
|
||||
|
||||
public Optional<Point2D> findIntersectionPoint(Ruling horizontal, Ruling vertical) {
|
||||
|
||||
if (!horizontal.isHorizontal() || !vertical.isVertical()) {
|
||||
log.warn("lines must be orthogonal, vertical and horizontal");
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
Ruling expanded_horizontal = horizontal.expand(PERPENDICULAR_UNIT_EXPAND_AMOUNT);
|
||||
Ruling expanded_vertical = vertical.expand(PERPENDICULAR_UNIT_EXPAND_AMOUNT);
|
||||
|
||||
if (!expanded_horizontal.intersectsLine(expanded_vertical)) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
return Optional.of(new Point2D.Float(vertical.getLeft(), horizontal.getTop()));
|
||||
}
|
||||
|
||||
|
||||
private class SweepStep implements Comparable<SweepStep> {
|
||||
|
||||
protected Type type;
|
||||
protected float y_position;
|
||||
protected Ruling ruling;
|
||||
|
||||
private enum Type {
|
||||
VERTICAL,
|
||||
HORIZONTAL_EXIT,
|
||||
HORIZONTAL_ENTRY
|
||||
}
|
||||
|
||||
|
||||
public SweepStep(Type type, float y_position, Ruling ruling) {
|
||||
|
||||
this.type = type;
|
||||
this.y_position = y_position;
|
||||
this.ruling = ruling;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int compareTo(SweepStep other) {
|
||||
|
||||
int rv;
|
||||
if (DoubleComparisons.feq(y_position, other.y_position)) {
|
||||
if (type == SweepStep.Type.VERTICAL && other.type == SweepStep.Type.HORIZONTAL_ENTRY) {
|
||||
rv = 1;
|
||||
} else if (type == SweepStep.Type.VERTICAL && other.type == SweepStep.Type.HORIZONTAL_EXIT) {
|
||||
rv = -1;
|
||||
} else if (type == SweepStep.Type.HORIZONTAL_ENTRY && other.type == SweepStep.Type.VERTICAL) {
|
||||
rv = -1;
|
||||
} else if (type == SweepStep.Type.HORIZONTAL_EXIT && other.type == SweepStep.Type.VERTICAL) {
|
||||
rv = 1;
|
||||
} else {
|
||||
rv = Double.compare(y_position, other.y_position);
|
||||
}
|
||||
} else {
|
||||
return Double.compare(y_position, other.y_position);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public record IntersectingRulings(Ruling horizontal, Ruling vertical) {
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,252 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.visualization;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.awt.geom.Line2D;
|
||||
import java.awt.geom.Point2D;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.BoundingBox;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.Line;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.Zone;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.MarkedContentUtils;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
|
||||
import com.knecon.fforesight.service.viewerdoc.ContentStreams;
|
||||
import com.knecon.fforesight.service.viewerdoc.model.ColoredLine;
|
||||
import com.knecon.fforesight.service.viewerdoc.model.ColoredRectangle;
|
||||
import com.knecon.fforesight.service.viewerdoc.model.PlacedText;
|
||||
import com.knecon.fforesight.service.viewerdoc.model.Standard14EmbeddableFont;
|
||||
import com.knecon.fforesight.service.viewerdoc.model.Visualizations;
|
||||
import com.knecon.fforesight.service.viewerdoc.model.VisualizationsOnPage;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Getter
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class LayoutparsingVisualizations {
|
||||
|
||||
static final Standard14EmbeddableFont FONT = Standard14EmbeddableFont.helvetica();
|
||||
|
||||
static final Color WORDS_COLOR = new Color(68, 84, 147);
|
||||
static final Color LINES_COLOR = new Color(152, 45, 179);
|
||||
static final Color ZONES_COLOR = new Color(131, 38, 38);
|
||||
static final Color RULINGS_COLOR = new Color(21, 221, 174);
|
||||
static final Color CELLS_COLOR = new Color(31, 214, 27);
|
||||
static final Color MAIN_BODY_COLOR = new Color(171, 131, 6);
|
||||
static final Color MARKED_CONTENT_COLOR = new Color(171, 131, 6);
|
||||
|
||||
static final List<Color> ROTATING_CHARACTER_COLOR = List.of(new Color(255, 87, 51),
|
||||
new Color(255, 195, 0),
|
||||
new Color(76, 175, 80),
|
||||
new Color(33, 150, 243),
|
||||
new Color(155, 89, 182),
|
||||
new Color(233, 30, 99),
|
||||
new Color(0, 188, 212),
|
||||
new Color(121, 85, 72));
|
||||
|
||||
@Setter
|
||||
boolean active = false;
|
||||
|
||||
final Visualizations words = Visualizations.builder().layer(ContentStreams.WORDS).build();
|
||||
final Visualizations lines = Visualizations.builder().layer(ContentStreams.LINES).build();
|
||||
final Visualizations zones = Visualizations.builder().layer(ContentStreams.ZONES).build();
|
||||
final Visualizations mainBody = Visualizations.builder().layer(ContentStreams.MAIN_BODY).build();
|
||||
final Visualizations rulings = Visualizations.builder().layer(ContentStreams.RULINGS).build();
|
||||
final Visualizations cells = Visualizations.builder().layer(ContentStreams.CELLS).build();
|
||||
final Visualizations markedContent = Visualizations.builder().layer(ContentStreams.MARKED_CONTENT).build();
|
||||
final Visualizations neighbours = Visualizations.builder().layer(ContentStreams.NEIGHBOURS).build();
|
||||
final Visualizations characters = Visualizations.builder().layer(ContentStreams.CHARACTERS).build();
|
||||
|
||||
|
||||
public Stream<Visualizations> streamAll() {
|
||||
|
||||
if (!active) {
|
||||
return Stream.empty();
|
||||
}
|
||||
return Stream.of(characters, //
|
||||
neighbours,//
|
||||
words, //
|
||||
lines, //
|
||||
zones, //
|
||||
rulings, //
|
||||
cells, //
|
||||
mainBody, //
|
||||
markedContent //
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
public void addTextVisualizations(List<TextPositionSequence> textPositionSequences, int pageNumber) {
|
||||
|
||||
if (!active) {
|
||||
return;
|
||||
}
|
||||
List<ColoredRectangle> list = textPositionSequences.stream()
|
||||
.map(textPositionSequence -> textPositionSequence.getTextPositions()
|
||||
.stream()
|
||||
.map(RedTextPosition::getInitialUserSpacePosition)
|
||||
.collect(RectangleTransformations.collectBBox()))
|
||||
.map(rect -> new ColoredRectangle(rect, WORDS_COLOR, 1))
|
||||
.toList();
|
||||
this.words.getVisualizationsOnPages().put(pageNumber - 1, VisualizationsOnPage.builder().coloredRectangles(list).build());
|
||||
}
|
||||
|
||||
|
||||
public void addCleanRulingVisualization(CleanRulings cleanRulings, int pageNumber) {
|
||||
|
||||
if (!active) {
|
||||
return;
|
||||
}
|
||||
this.rulings.getVisualizationsOnPages()
|
||||
.put(pageNumber - 1,
|
||||
VisualizationsOnPage.builder()
|
||||
.coloredLines(Stream.of(cleanRulings.getHorizontal(), cleanRulings.getVertical())
|
||||
.flatMap(Collection::stream)
|
||||
.map(ruling -> new ColoredLine(ruling, RULINGS_COLOR, 1))
|
||||
.toList())
|
||||
.build());
|
||||
}
|
||||
|
||||
|
||||
public void addCellVisualizations(List<? extends Rectangle2D> cells, int pageNumber) {
|
||||
|
||||
if (!active) {
|
||||
return;
|
||||
}
|
||||
this.cells.getVisualizationsOnPages()
|
||||
.put(pageNumber - 1,
|
||||
VisualizationsOnPage.builder()
|
||||
.coloredRectangles(cells.stream()
|
||||
.map(ruling -> new ColoredRectangle(ruling, CELLS_COLOR, 1))
|
||||
.toList())
|
||||
.build());
|
||||
}
|
||||
|
||||
|
||||
public void addZoneVisualizations(List<Zone> zones, int page) {
|
||||
|
||||
if (!active) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.zones.getVisualizationsOnPages()
|
||||
.put(page - 1,
|
||||
VisualizationsOnPage.builder()
|
||||
.coloredRectangles(zones.stream()
|
||||
.map(BoundingBox::getBBox)
|
||||
.map(zone -> new ColoredRectangle(zone, ZONES_COLOR, 1))
|
||||
.toList())
|
||||
.build());
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void addLineVisualizations(List<Zone> zones, int page) {
|
||||
|
||||
if (!active) {
|
||||
return;
|
||||
}
|
||||
this.lines.getVisualizationsOnPages()
|
||||
.put(page - 1,
|
||||
VisualizationsOnPage.builder()
|
||||
.coloredRectangles(zones.stream()
|
||||
.map(Zone::getLines)
|
||||
.flatMap(Collection::stream)
|
||||
.map(BoundingBox::getBBox)
|
||||
.map(line -> new ColoredRectangle(line, LINES_COLOR, 1))
|
||||
.toList())
|
||||
.build());
|
||||
}
|
||||
|
||||
|
||||
public void addMainBodyVisualization(Rectangle rectangle, int pageNumber) {
|
||||
|
||||
if (!active) {
|
||||
return;
|
||||
}
|
||||
this.mainBody.getVisualizationsOnPages()
|
||||
.put(pageNumber - 1,
|
||||
VisualizationsOnPage.builder()
|
||||
.coloredRectangles(List.of(new ColoredRectangle(new Rectangle2D.Double(rectangle.getTopLeft().getX(),
|
||||
rectangle.getTopLeft().getY(),
|
||||
rectangle.getWidth(),
|
||||
rectangle.getHeight()), MAIN_BODY_COLOR, 1)))
|
||||
.build());
|
||||
}
|
||||
|
||||
|
||||
public void addMarkedContentVisualizations(List<PDMarkedContent> markedContents, int pageNumber, PDPage pdPage) {
|
||||
|
||||
if (!active) {
|
||||
return;
|
||||
}
|
||||
List<MarkedContentUtils.MarkedContentPosition> markedContentBBoxMapBySubType = MarkedContentUtils.getMarkedContentPositions(markedContents, pdPage);
|
||||
VisualizationsOnPage visualizationsOnPage = VisualizationsOnPage.builder().build();
|
||||
this.markedContent.getVisualizationsOnPages().put(pageNumber - 1, visualizationsOnPage);
|
||||
markedContentBBoxMapBySubType.forEach(markedContentPosition -> {
|
||||
|
||||
var bbox = markedContentPosition.textPositions()
|
||||
.stream()
|
||||
.collect(RectangleTransformations.collectBBox());
|
||||
String type = markedContentPosition.formattedType();
|
||||
float translationAmount = ((FONT.getStringWidth(type) / 1000) * 10 + (2 * 1) + 4);
|
||||
visualizationsOnPage.getPlacedTexts()
|
||||
.add(PlacedText.textFacingUp(type, new Point2D.Double(bbox.getX() - translationAmount, bbox.getY() + bbox.getHeight()), 10, Color.BLACK, FONT));
|
||||
|
||||
visualizationsOnPage.getColoredRectangles().add(new ColoredRectangle(bbox, MARKED_CONTENT_COLOR, 1));
|
||||
}
|
||||
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void addCharactersWithNeighbours(List<Zone> zones, int page) {
|
||||
|
||||
if (!active) {
|
||||
return;
|
||||
}
|
||||
|
||||
VisualizationsOnPage neighbourVisualizations = VisualizationsOnPage.builder().build();
|
||||
neighbours.getVisualizationsOnPages().put(page - 1, neighbourVisualizations);
|
||||
VisualizationsOnPage characterVisualizations = VisualizationsOnPage.builder().build();
|
||||
characters.getVisualizationsOnPages().put(page - 1, characterVisualizations);
|
||||
|
||||
AtomicInteger index = new AtomicInteger(0);
|
||||
zones.forEach(zone -> zone.getLines()
|
||||
.stream()
|
||||
.map(Line::getCharacters)
|
||||
.flatMap(Collection::stream)
|
||||
.forEach(character -> {
|
||||
Color color = ROTATING_CHARACTER_COLOR.get(index.getAndIncrement() % ROTATING_CHARACTER_COLOR.size());
|
||||
Rectangle2D charBBox = character.getTextPosition().getInitialUserSpacePosition();
|
||||
characterVisualizations.getColoredRectangles().add(new ColoredRectangle(charBBox, color, 1));
|
||||
character.getNeighbors()
|
||||
.forEach(neighbor -> {
|
||||
Rectangle2D neighborBBox = neighbor.getCharacter().getTextPosition().getInitialUserSpacePosition();
|
||||
Line2D line = new Line2D.Double(new Point2D.Double(charBBox.getCenterX(), charBBox.getCenterY()),
|
||||
new Point2D.Double(neighborBBox.getCenterX(), neighborBBox.getCenterY()));
|
||||
neighbourVisualizations.getColoredLines().add(new ColoredLine(line, color, 1));
|
||||
});
|
||||
}));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,10 +1,20 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server;
|
||||
|
||||
import java.io.File;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingFinishedEvent;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
@ -22,26 +32,63 @@ public class LayoutparserEnd2EndTest extends AbstractTest {
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testLayoutParserEndToEnd() {
|
||||
|
||||
prepareStorage("files/bdr/Wie weiter bei Kristeneinrichtungen.pdf");
|
||||
LayoutParsingRequest layoutParsingRequest = buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER);
|
||||
LayoutParsingFinishedEvent finishedEvent = layoutParsingPipeline.parseLayoutAndSaveFilesToStorage(layoutParsingRequest);
|
||||
Arrays.stream(finishedEvent.message().split("\n"))
|
||||
.forEach(log::info);
|
||||
String filePath = "files/bdr/Wie weiter bei Kristeneinrichtungen.pdf";
|
||||
|
||||
runForFile(filePath);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Disabled
|
||||
@SneakyThrows
|
||||
public void testLayoutParserEndToEndWithFolder() {
|
||||
|
||||
String folder = "/home/kschuettler/Dokumente/TestFiles/syn-dm-testfiles-pdftron-ocred";
|
||||
List<Path> pdfFiles = Files.walk(Path.of(folder))
|
||||
.filter(path -> path.getFileName().toString().endsWith(".pdf"))
|
||||
.sorted(Comparator.comparing(Path::getFileName))
|
||||
.peek(System.out::println)
|
||||
.toList();
|
||||
|
||||
System.out.printf("Found %d pdf files to process %n", pdfFiles.size());
|
||||
AtomicInteger count = new AtomicInteger(0);
|
||||
pdfFiles.stream()
|
||||
.peek(path -> log.info("{}/{}-{}", count.getAndIncrement(), pdfFiles.size(), path.getFileName()))
|
||||
.forEach(path -> runForFile(path.toFile().toString()));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testLayoutParserEndToEnd_RED_8747() {
|
||||
private void runForFile(String filePath) {
|
||||
|
||||
String fileName = Path.of(filePath).getFileName().toString();
|
||||
File file;
|
||||
if (filePath.startsWith("files")) { // from resources
|
||||
file = new ClassPathResource(filePath).getFile();
|
||||
} else { // absolute path
|
||||
file = new File(filePath);
|
||||
}
|
||||
|
||||
LayoutParsingRequest layoutParsingRequest = buildDefaultLayoutParsingRequest(fileName, LayoutParsingType.REDACT_MANAGER, true);
|
||||
prepareStorage(layoutParsingRequest, file);
|
||||
|
||||
prepareStorage("files/syngenta/CustomerFiles/SinglePages/Page26_fRR A23317A PI0015600 CEU core part B6 - CZ.pdf");
|
||||
LayoutParsingRequest layoutParsingRequest = buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER_OLD);
|
||||
LayoutParsingFinishedEvent finishedEvent = layoutParsingPipeline.parseLayoutAndSaveFilesToStorage(layoutParsingRequest);
|
||||
|
||||
Arrays.stream(finishedEvent.message().split("\n"))
|
||||
.forEach(log::info);
|
||||
|
||||
File tmpFile = new File("/tmp/layout-E2E/" + fileName + "_VIEWER.pdf");
|
||||
assert tmpFile.getParentFile().exists() || tmpFile.getParentFile().mkdirs();
|
||||
|
||||
storageService.downloadTo(TENANT_ID, layoutParsingRequest.viewerDocumentStorageId(), tmpFile);
|
||||
}
|
||||
|
||||
|
||||
@AfterEach
|
||||
public void cleanUpTmp() {
|
||||
|
||||
((FileSystemBackedStorageService) storageService).clearStorage();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -23,6 +23,10 @@ import lombok.SneakyThrows;
|
||||
|
||||
public class ViewerDocumentTest extends BuildDocumentTest {
|
||||
|
||||
ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null);
|
||||
LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testViewerDocument() {
|
||||
@ -31,11 +35,9 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
||||
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
|
||||
|
||||
var documentFile = new ClassPathResource(fileName).getFile();
|
||||
ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null);
|
||||
LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
Document document = buildGraph(fileName, LayoutParsingType.DOCUMINE);
|
||||
Document document = buildGraph(fileName, LayoutParsingType.REDACT_MANAGER);
|
||||
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true);
|
||||
System.out.printf("Total time: %.2fs%n", ((float) (System.currentTimeMillis() - start)) / 1000);
|
||||
}
|
||||
@ -55,11 +57,11 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
||||
var documentFile = new ClassPathResource(fileName).getFile();
|
||||
|
||||
var classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.DOCUMINE,
|
||||
documentFile,
|
||||
new ImageServiceResponse(),
|
||||
tableResponse,
|
||||
new VisualLayoutParsingResponse(),
|
||||
Map.of("file", Path.of(fileName).getFileName().toFile().toString()));
|
||||
documentFile,
|
||||
new ImageServiceResponse(),
|
||||
tableResponse,
|
||||
new VisualLayoutParsingResponse(),
|
||||
Map.of("file", Path.of(fileName).getFileName().toFile().toString()));
|
||||
ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null);
|
||||
LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
|
||||
Document document = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.DOCUMINE, classificationDocument);
|
||||
|
||||
@ -1,6 +1,9 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server.utils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
@ -102,29 +105,22 @@ public abstract class AbstractTest {
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
protected LayoutParsingRequest prepareStorage(InputStream fileInputStream) {
|
||||
|
||||
storageService.storeObject(TenantContext.getTenantId(), ORIGIN_FILE_ID, fileInputStream);
|
||||
return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER_OLD);
|
||||
}
|
||||
|
||||
|
||||
protected LayoutParsingRequest buildDefaultLayoutParsingRequest(LayoutParsingType layoutParsingType) {
|
||||
protected LayoutParsingRequest buildDefaultLayoutParsingRequest(String fileName, LayoutParsingType layoutParsingType, boolean debug) {
|
||||
|
||||
var identifier = debug ? Map.of("fileId", fileName, "debug", "true") : Map.of("fileId", fileName);
|
||||
return LayoutParsingRequest.builder()
|
||||
.identifier(Map.of("fileId", "1337"))
|
||||
.identifier(identifier)
|
||||
.layoutParsingType(layoutParsingType)
|
||||
.originFileStorageId(ORIGIN_FILE_ID)
|
||||
.tablesFileStorageId(Optional.of(TABLE_FILE_ID))
|
||||
.imagesFileStorageId(Optional.of(IMAGE_FILE_ID))
|
||||
.visualLayoutParsingFileId(Optional.of(VISUAL_LAYOUT_FILE))
|
||||
.structureFileStorageId(STRUCTURE_FILE_ID)
|
||||
.textBlockFileStorageId(TEXT_FILE_ID)
|
||||
.positionBlockFileStorageId(POSITION_FILE_ID)
|
||||
.pageFileStorageId(PAGES_FILE_ID)
|
||||
.simplifiedTextStorageId(SIMPLIFIED_ID)
|
||||
.viewerDocumentStorageId(VIEWER_DOCUMENT_ID)
|
||||
.originFileStorageId(fileName + ORIGIN_FILE_ID)
|
||||
.tablesFileStorageId(Optional.of(fileName + TABLE_FILE_ID))
|
||||
.imagesFileStorageId(Optional.of(fileName + IMAGE_FILE_ID))
|
||||
.visualLayoutParsingFileId(Optional.of(fileName + VISUAL_LAYOUT_FILE))
|
||||
.structureFileStorageId(fileName + STRUCTURE_FILE_ID)
|
||||
.textBlockFileStorageId(fileName + TEXT_FILE_ID)
|
||||
.positionBlockFileStorageId(fileName + POSITION_FILE_ID)
|
||||
.pageFileStorageId(fileName + PAGES_FILE_ID)
|
||||
.simplifiedTextStorageId(fileName + SIMPLIFIED_ID)
|
||||
.viewerDocumentStorageId(fileName + VIEWER_DOCUMENT_ID)
|
||||
.build();
|
||||
}
|
||||
|
||||
@ -148,10 +144,28 @@ public abstract class AbstractTest {
|
||||
ClassPathResource imageInfoFileResource = new ClassPathResource(imageInfoFile);
|
||||
ClassPathResource visualLayoutParsingResponseResource = new ClassPathResource(visualLayoutParsingResponseFile);
|
||||
|
||||
return prepareStorage(pdfFileResource.getInputStream(),
|
||||
cvServiceResponseFileResource.getInputStream(),
|
||||
imageInfoFileResource.getInputStream(),
|
||||
visualLayoutParsingResponseResource.getInputStream());
|
||||
return prepareStorage(Path.of(file).getFileName().toString(),
|
||||
pdfFileResource.getInputStream(),
|
||||
cvServiceResponseFileResource.getInputStream(),
|
||||
imageInfoFileResource.getInputStream(),
|
||||
visualLayoutParsingResponseResource.getInputStream());
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
protected void prepareStorage(LayoutParsingRequest layoutParsingRequest, File file) {
|
||||
|
||||
ClassPathResource cvServiceResponseFileResource = new ClassPathResource("cv_table_parsing_response/empty.json");
|
||||
ClassPathResource imageInfoFileResource = new ClassPathResource("image_service_response/empty.json");
|
||||
ClassPathResource visualLayoutParsingResponseResource = new ClassPathResource("visual_layout_parsing_response/empty.json");
|
||||
|
||||
try (var in = new FileInputStream(file)) {
|
||||
prepareStorage(layoutParsingRequest,
|
||||
in,
|
||||
cvServiceResponseFileResource.getInputStream(),
|
||||
imageInfoFileResource.getInputStream(),
|
||||
visualLayoutParsingResponseResource.getInputStream());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -162,12 +176,27 @@ public abstract class AbstractTest {
|
||||
storageService.storeObject(TenantContext.getTenantId(), TABLE_FILE_ID, cvServiceResponseFileStream);
|
||||
storageService.storeObject(TenantContext.getTenantId(), ORIGIN_FILE_ID, fileStream);
|
||||
|
||||
return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER_OLD);
|
||||
return buildDefaultLayoutParsingRequest("test", LayoutParsingType.REDACT_MANAGER_OLD, true);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
protected LayoutParsingRequest prepareStorage(InputStream fileStream,
|
||||
protected void prepareStorage(LayoutParsingRequest layoutParsingRequest,
|
||||
InputStream fileStream,
|
||||
InputStream cvServiceResponseFileStream,
|
||||
InputStream imageInfoStream,
|
||||
InputStream visualLayoutParsingResponseFileStream) {
|
||||
|
||||
storageService.storeObject(TenantContext.getTenantId(), layoutParsingRequest.imagesFileStorageId().get(), imageInfoStream);
|
||||
storageService.storeObject(TenantContext.getTenantId(), layoutParsingRequest.tablesFileStorageId().get(), cvServiceResponseFileStream);
|
||||
storageService.storeObject(TenantContext.getTenantId(), layoutParsingRequest.originFileStorageId(), fileStream);
|
||||
storageService.storeObject(TenantContext.getTenantId(), layoutParsingRequest.visualLayoutParsingFileId().get(), visualLayoutParsingResponseFileStream);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
protected LayoutParsingRequest prepareStorage(String fileName,
|
||||
InputStream fileStream,
|
||||
InputStream cvServiceResponseFileStream,
|
||||
InputStream imageInfoStream,
|
||||
InputStream visualLayoutParsingResponseFileStream) {
|
||||
@ -177,7 +206,7 @@ public abstract class AbstractTest {
|
||||
storageService.storeObject(TenantContext.getTenantId(), ORIGIN_FILE_ID, fileStream);
|
||||
storageService.storeObject(TenantContext.getTenantId(), VISUAL_LAYOUT_FILE, visualLayoutParsingResponseFileStream);
|
||||
|
||||
return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER_OLD);
|
||||
return buildDefaultLayoutParsingRequest(fileName, LayoutParsingType.REDACT_MANAGER_OLD, true);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,11 +1,13 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server.utils;
|
||||
|
||||
import java.io.File;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Map;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipeline;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
|
||||
@ -28,11 +30,11 @@ public abstract class BuildDocumentTest extends AbstractTest {
|
||||
File fileResource = new ClassPathResource(filename).getFile();
|
||||
prepareStorage(filename);
|
||||
return layoutParsingPipeline.parseLayout(layoutParsingType,
|
||||
fileResource,
|
||||
layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID),
|
||||
new TableServiceResponse(),
|
||||
new VisualLayoutParsingResponse(),
|
||||
Map.of("file",filename));
|
||||
fileResource,
|
||||
layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID),
|
||||
new TableServiceResponse(),
|
||||
new VisualLayoutParsingResponse(),
|
||||
Map.of("file", filename, "debug", "true"));
|
||||
}
|
||||
|
||||
|
||||
@ -46,13 +48,25 @@ public abstract class BuildDocumentTest extends AbstractTest {
|
||||
@SneakyThrows
|
||||
protected Document buildGraph(String filename, LayoutParsingType layoutParsingType) {
|
||||
|
||||
if (filename.equals("files/syngenta/CustomerFiles/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf")) {
|
||||
prepareStorage(filename, "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json");
|
||||
if (!filename.startsWith("files") && filename.startsWith("/")) {
|
||||
LayoutParsingRequest layoutParsingRequest = buildDefaultLayoutParsingRequest(Path.of(filename).getFileName().toString(), LayoutParsingType.REDACT_MANAGER, true);
|
||||
prepareStorage(layoutParsingRequest, new File(filename));
|
||||
return DocumentGraphFactory.buildDocumentGraph(layoutParsingType,
|
||||
layoutParsingPipeline.parseLayout(layoutParsingType,
|
||||
new File(filename),
|
||||
layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId().get()),
|
||||
new TableServiceResponse(),
|
||||
new VisualLayoutParsingResponse(),
|
||||
layoutParsingRequest.identifier()));
|
||||
} else {
|
||||
prepareStorage(filename);
|
||||
if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf")) {
|
||||
prepareStorage(filename, "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json");
|
||||
} else {
|
||||
prepareStorage(filename);
|
||||
}
|
||||
return DocumentGraphFactory.buildDocumentGraph(layoutParsingType, parseLayout(filename, layoutParsingType));
|
||||
}
|
||||
|
||||
return DocumentGraphFactory.buildDocumentGraph(layoutParsingType, parseLayout(filename, layoutParsingType));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Binary file not shown.
@ -26,6 +26,23 @@ public class ContentStreams {
|
||||
|
||||
public static Identifier ESCAPE_END = new Identifier("escape start", COSName.getPDFName("ESCAPE_END"), false);
|
||||
|
||||
public static Identifier RULINGS = new Identifier("Rulings", COSName.getPDFName("KNECON_RULINGS"), true);
|
||||
|
||||
public static Identifier WORDS = new Identifier("Words", COSName.getPDFName("KNECON_WORDS"), true);
|
||||
|
||||
public static Identifier ZONES = new Identifier("Text Zones", COSName.getPDFName("KNECON_ZONES"), true);
|
||||
|
||||
public static Identifier LINES = new Identifier("Text Lines", COSName.getPDFName("KNECON_LINES"), true);
|
||||
|
||||
public static Identifier CELLS = new Identifier("Cells", COSName.getPDFName("KNECON_CELLS"), true);
|
||||
|
||||
public static Identifier MAIN_BODY = new Identifier("Main Text Body", COSName.getPDFName("KNECON_MAIN_BODY"), true);
|
||||
|
||||
public static Identifier MARKED_CONTENT = new Identifier("Marked content", COSName.getPDFName("KNECON_MARKED_CONTENT"), true);
|
||||
|
||||
public static Identifier NEIGHBOURS = new Identifier("Neighbours", COSName.getPDFName("KNECON_NEIGHBOURS"), true);
|
||||
public static Identifier CHARACTERS = new Identifier("Characters", COSName.getPDFName("KNECON_CHARACTERS"), true);
|
||||
|
||||
public static List<Identifier> allContentStreams = List.of(KNECON_LAYOUT,
|
||||
KNECON_VISUAL_PARSING,
|
||||
KNECON_OCR,
|
||||
@ -33,7 +50,16 @@ public class ContentStreams {
|
||||
KNECON_OCR_TEXT_DEBUG,
|
||||
OTHER,
|
||||
ESCAPE_START,
|
||||
ESCAPE_END);
|
||||
ESCAPE_END,
|
||||
RULINGS,
|
||||
WORDS,
|
||||
ZONES,
|
||||
LINES,
|
||||
MAIN_BODY,
|
||||
MARKED_CONTENT,
|
||||
NEIGHBOURS,
|
||||
CHARACTERS,
|
||||
CELLS);
|
||||
|
||||
public record Identifier(String name, COSName cosName, boolean optionalContent) {
|
||||
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
package com.knecon.fforesight.service.viewerdoc.model;
|
||||
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import com.knecon.fforesight.service.viewerdoc.ContentStreams;
|
||||
@ -17,7 +18,8 @@ import lombok.experimental.FieldDefaults;
|
||||
public class Visualizations {
|
||||
|
||||
ContentStreams.Identifier layer;
|
||||
Map<Integer, VisualizationsOnPage> visualizationsOnPages;
|
||||
@Builder.Default
|
||||
Map<Integer, VisualizationsOnPage> visualizationsOnPages = new LinkedHashMap<>();
|
||||
boolean layerVisibilityDefaultValue;
|
||||
|
||||
}
|
||||
|
||||
@ -53,12 +53,6 @@ public class ViewerDocumentService {
|
||||
private final ObservationRegistry registry;
|
||||
|
||||
|
||||
public void addVisualizationsOnPage(File originFile, File destinationFile, Visualizations visualizations) {
|
||||
|
||||
addVisualizationsOnPage(originFile, destinationFile, List.of(visualizations));
|
||||
}
|
||||
|
||||
|
||||
@Observed(name = "ViewerDocumentService", contextualName = "add-visualizations")
|
||||
@SneakyThrows
|
||||
public void addVisualizationsOnPage(File originFile, File destinationFile, List<Visualizations> visualizations) {
|
||||
@ -70,9 +64,14 @@ public class ViewerDocumentService {
|
||||
|
||||
PDDocument pdDocument = openPDDocument(tmpFile.toFile());
|
||||
|
||||
enrichObservation(pdDocument, visualizations.stream().map(Visualizations::getLayer).toList());
|
||||
enrichObservation(pdDocument,
|
||||
visualizations.stream()
|
||||
.map(Visualizations::getLayer)
|
||||
.toList());
|
||||
|
||||
Set<ContentStreams.Identifier> allLayers = visualizations.stream().map(Visualizations::getLayer).collect(Collectors.toUnmodifiableSet());
|
||||
Set<ContentStreams.Identifier> allLayers = visualizations.stream()
|
||||
.map(Visualizations::getLayer)
|
||||
.collect(Collectors.toUnmodifiableSet());
|
||||
|
||||
Map<ContentStreams.Identifier, PDOptionalContentGroup> optionalContentGroupMap = addLayersToDocument(visualizations, pdDocument);
|
||||
|
||||
@ -186,7 +185,8 @@ public class ViewerDocumentService {
|
||||
contentStream.setFont(font, placedText.fontSize());
|
||||
contentStream.beginText();
|
||||
contentStream.setNonStrokingColor(placedText.color());
|
||||
if (placedText.renderingMode().isPresent()) {
|
||||
if (placedText.renderingMode()
|
||||
.isPresent()) {
|
||||
contentStream.setRenderingMode(placedText.renderingMode().get());
|
||||
} else {
|
||||
contentStream.setRenderingMode(RenderingMode.FILL);
|
||||
@ -229,11 +229,11 @@ public class ViewerDocumentService {
|
||||
Matrix textMatrix;
|
||||
if (placedText.textMatrix().isEmpty()) {
|
||||
textMatrix = new Matrix((float) textDeRotationMatrix.getScaleX(),
|
||||
(float) textDeRotationMatrix.getShearX(),
|
||||
(float) textDeRotationMatrix.getShearY(),
|
||||
(float) textDeRotationMatrix.getScaleY(),
|
||||
(float) placedText.lineStart().getX(),
|
||||
(float) placedText.lineStart().getY());
|
||||
(float) textDeRotationMatrix.getShearX(),
|
||||
(float) textDeRotationMatrix.getShearY(),
|
||||
(float) textDeRotationMatrix.getScaleY(),
|
||||
(float) placedText.lineStart().getX(),
|
||||
(float) placedText.lineStart().getY());
|
||||
} else {
|
||||
textMatrix = placedText.textMatrix().get();
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user