Merge branch 'RED-8826' into 'main'
Red 8826 See merge request fforesight/layout-parser!138
This commit is contained in:
commit
58acbab85f
@ -27,6 +27,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBl
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
|
||||
@ -51,6 +52,7 @@ import com.knecon.fforesight.service.layoutparser.processor.services.classificat
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.classification.DocuMineClassificationService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.graphics.GraphicExtractorService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.TaasDocumentDataMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.parsing.PDFLinesTextStripper;
|
||||
@ -90,6 +92,7 @@ public class LayoutParsingPipeline {
|
||||
ObservationRegistry observationRegistry;
|
||||
VisualLayoutParsingAdapter visualLayoutParsingAdapter;
|
||||
ClarifyndClassificationService clarifyndClassificationService;
|
||||
GraphicExtractorService graphicExtractorService;
|
||||
|
||||
|
||||
public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException {
|
||||
@ -256,9 +259,21 @@ public class LayoutParsingPipeline {
|
||||
|
||||
List<Cell> emptyTableCells = TableExtractionService.findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical());
|
||||
|
||||
var graphics = graphicExtractorService.extractPathElementGraphics(originDocument,
|
||||
pdPage,
|
||||
pageNumber,
|
||||
cleanRulings,
|
||||
stripper.getTextPositionSequences(),
|
||||
emptyTableCells,
|
||||
false);
|
||||
|
||||
pdfImages.computeIfAbsent(pageNumber, x -> new ArrayList<>())
|
||||
.addAll(graphics.stream()
|
||||
.map(g -> new ClassifiedImage(new Rectangle2D.Double(g.x1, g.y1, g.width(), g.height()), ImageType.GRAPHIC, false, stripper.getPageNumber()))
|
||||
.toList());
|
||||
|
||||
ClassificationPage classificationPage = switch (layoutParsingType) {
|
||||
case REDACT_MANAGER_OLD ->
|
||||
redactManagerBlockificationService.blockify(stripper.getTextPositionSequences(), emptyTableCells);
|
||||
case REDACT_MANAGER_OLD -> redactManagerBlockificationService.blockify(stripper.getTextPositionSequences(), emptyTableCells);
|
||||
case DOCUMINE -> docuMineBlockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
|
||||
case REDACT_MANAGER, REDACT_MANAGER_PARAGRAPH_DEBUG -> docstrumBlockificationService.blockify(stripper.getTextPositionSequences(), emptyTableCells, true);
|
||||
case CLARIFYND, CLARIFYND_PARAGRAPH_DEBUG -> docstrumBlockificationService.blockify(stripper.getTextPositionSequences(), emptyTableCells, false);
|
||||
|
||||
@ -9,7 +9,8 @@ public enum ImageType {
|
||||
|
||||
SIGNATURE_VISUAL,
|
||||
OTHER,
|
||||
OCR;
|
||||
OCR,
|
||||
GRAPHIC;
|
||||
|
||||
|
||||
public static ImageType fromString(String imageType) {
|
||||
@ -19,6 +20,7 @@ public enum ImageType {
|
||||
case "formula" -> ImageType.FORMULA;
|
||||
case "signature" -> ImageType.SIGNATURE;
|
||||
case "ocr" -> ImageType.OCR;
|
||||
case "graphic" -> ImageType.GRAPHIC;
|
||||
default -> ImageType.OTHER;
|
||||
};
|
||||
}
|
||||
|
||||
@ -9,10 +9,10 @@ import java.util.Map;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@ -20,8 +20,7 @@ import lombok.RequiredArgsConstructor;
|
||||
@RequiredArgsConstructor
|
||||
public class ImageServiceResponseAdapter {
|
||||
|
||||
|
||||
public Map<Integer, List<ClassifiedImage>> buildClassifiedImagesPerPage(ImageServiceResponse imageServiceResponse ) {
|
||||
public Map<Integer, List<ClassifiedImage>> buildClassifiedImagesPerPage(ImageServiceResponse imageServiceResponse) {
|
||||
|
||||
Map<Integer, List<ClassifiedImage>> images = new HashMap<>();
|
||||
imageServiceResponse.getData().forEach(imageMetadata -> {
|
||||
|
||||
@ -3,14 +3,15 @@ package com.knecon.fforesight.service.layoutparser.processor.services.classifica
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.MarkedContentUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.MarkedContentUtils;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.PositionUtils;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
@ -21,7 +22,6 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@RequiredArgsConstructor
|
||||
public class RedactManagerClassificationService {
|
||||
|
||||
|
||||
public void classifyDocument(ClassificationDocument document) {
|
||||
|
||||
List<Float> headlineFontSizes = document.getFontSizeCounter().getHighterThanMostPopular();
|
||||
@ -52,14 +52,25 @@ public class RedactManagerClassificationService {
|
||||
textBlock.setClassification(PageBlockType.OTHER);
|
||||
return;
|
||||
}
|
||||
if (MarkedContentUtils.intersects(textBlock, page.getMarkedContentBboxPerType(), MarkedContentUtils.HEADER)
|
||||
|| PositionUtils.isOverBodyTextFrame(bodyTextFrame, textBlock, page.getRotation()) && (document.getFontSizeCounter()
|
||||
.getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter().getMostPopular())) {
|
||||
|
||||
if (page.getImages()
|
||||
.stream()
|
||||
.filter(image -> image.getImageType().equals(ImageType.GRAPHIC))
|
||||
.anyMatch(graphic -> graphic.getPosition().intersects(textBlock.getPdfMinX(), textBlock.getPdfMinY(), textBlock.getWidth(), textBlock.getHeight()))) {
|
||||
textBlock.setClassification(PageBlockType.PARAGRAPH);
|
||||
return;
|
||||
}
|
||||
|
||||
if (MarkedContentUtils.intersects(textBlock, page.getMarkedContentBboxPerType(), MarkedContentUtils.HEADER) || PositionUtils.isOverBodyTextFrame(bodyTextFrame,
|
||||
textBlock,
|
||||
page.getRotation()) && (document.getFontSizeCounter().getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter()
|
||||
.getMostPopular())) {
|
||||
textBlock.setClassification(PageBlockType.HEADER);
|
||||
|
||||
} else if (MarkedContentUtils.intersects(textBlock, page.getMarkedContentBboxPerType(), MarkedContentUtils.FOOTER)
|
||||
|| PositionUtils.isUnderBodyTextFrame(bodyTextFrame, textBlock, page.getRotation()) && (document.getFontSizeCounter()
|
||||
.getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter().getMostPopular())) {
|
||||
} else if (MarkedContentUtils.intersects(textBlock, page.getMarkedContentBboxPerType(), MarkedContentUtils.FOOTER) || PositionUtils.isUnderBodyTextFrame(bodyTextFrame,
|
||||
textBlock,
|
||||
page.getRotation()) && (document.getFontSizeCounter().getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter()
|
||||
.getMostPopular())) {
|
||||
textBlock.setClassification(PageBlockType.FOOTER);
|
||||
} else if (page.getPageNumber() == 1 && (PositionUtils.getHeightDifferenceBetweenChunkWordAndDocumentWord(textBlock,
|
||||
document.getTextHeightCounter().getMostPopular()) > 2.5 && textBlock.getHighestFontSize() > document.getFontSizeCounter().getMostPopular() || page.getTextBlocks()
|
||||
|
||||
@ -8,10 +8,10 @@ import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.Boundary;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextDirection;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.Boundary;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@ -110,6 +110,7 @@ public class SearchTextWithTextPositionFactory {
|
||||
return context.stringIdx - context.lastHyphenIdx < MAX_HYPHEN_LINEBREAK_DISTANCE;
|
||||
}
|
||||
|
||||
|
||||
private static List<Boundary> mergeToBoundaries(List<Integer> integers) {
|
||||
|
||||
if (integers.isEmpty()) {
|
||||
@ -125,8 +126,9 @@ public class SearchTextWithTextPositionFactory {
|
||||
}
|
||||
end = current + 1;
|
||||
}
|
||||
if (boundaries.isEmpty())
|
||||
if (boundaries.isEmpty()) {
|
||||
boundaries.add(new Boundary(start, end));
|
||||
}
|
||||
return boundaries;
|
||||
}
|
||||
|
||||
@ -138,6 +140,7 @@ public class SearchTextWithTextPositionFactory {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private boolean isLineBreak(RedTextPosition currentTextPosition, RedTextPosition previousTextPosition) {
|
||||
|
||||
return Objects.equals(currentTextPosition.getUnicode(), "\n") || isDeltaYLargerThanTextHeight(currentTextPosition, previousTextPosition);
|
||||
@ -177,7 +180,7 @@ public class SearchTextWithTextPositionFactory {
|
||||
}
|
||||
|
||||
|
||||
private Rectangle2D mapRedTextPositionToInitialUserSpace(RedTextPosition textPosition, TextPositionSequence sequence) {
|
||||
public Rectangle2D mapRedTextPositionToInitialUserSpace(RedTextPosition textPosition, TextPositionSequence sequence) {
|
||||
|
||||
float textHeight = sequence.getTextHeight() + HEIGHT_PADDING;
|
||||
Rectangle2D rectangle2D = new Rectangle2D.Double(textPosition.getXDirAdj(),
|
||||
|
||||
@ -0,0 +1,162 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.graphics;
|
||||
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.awt.geom.Point2D;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
public class Box {
|
||||
|
||||
public double x1;
|
||||
public double y1;
|
||||
public double x2;
|
||||
public double y2;
|
||||
|
||||
|
||||
public Box(double x1, double y1, double x2, double y2) {
|
||||
|
||||
this.x1 = x1;
|
||||
this.y1 = y1;
|
||||
this.x2 = x2;
|
||||
this.y2 = y2;
|
||||
}
|
||||
|
||||
|
||||
public Box(Rectangle2D rectangle2D) {
|
||||
|
||||
this.x1 = rectangle2D.getMinX();
|
||||
this.y1 = rectangle2D.getMinY();
|
||||
this.x2 = rectangle2D.getMaxX();
|
||||
this.y2 = rectangle2D.getMaxY();
|
||||
}
|
||||
|
||||
|
||||
public double width() {
|
||||
|
||||
return x2 - x1;
|
||||
}
|
||||
|
||||
|
||||
public double height() {
|
||||
|
||||
return y2 - y1;
|
||||
}
|
||||
|
||||
|
||||
public double xCenter() {
|
||||
|
||||
return (x2 + x1) / 2;
|
||||
}
|
||||
|
||||
|
||||
public double yCenter() {
|
||||
|
||||
return (y2 + y1) / 2;
|
||||
}
|
||||
|
||||
|
||||
public double area() {
|
||||
|
||||
return width() * height();
|
||||
}
|
||||
|
||||
|
||||
public Box scale(double scale) {
|
||||
|
||||
return new Box(x1 * scale, y1 * scale, x2 * scale, y2 * scale);
|
||||
}
|
||||
|
||||
|
||||
public boolean horizontallyAligned(Box other, double tol) {
|
||||
|
||||
return !(other.x1 - tol > x2 || other.x2 + tol < x1);
|
||||
}
|
||||
|
||||
|
||||
public double yDistanceTo(Box other) {
|
||||
|
||||
return Math.min(Math.abs(other.y1 - y2), Math.abs(y2 - other.y1));
|
||||
}
|
||||
|
||||
|
||||
public boolean intersects(Box other, double tol) {
|
||||
|
||||
return !((x2 < other.x1 - tol) || (x1 > other.x2 + tol) || (y2 < other.y1 - tol) || (y1 > other.y2 + tol));
|
||||
}
|
||||
|
||||
|
||||
public boolean intersectsAndOver(Box other, double tol) {
|
||||
|
||||
return (!((x2 < other.x1 - tol) || (x1 > other.x2 + tol) || (y2 < other.y1 - tol) || (y1 > other.y2 + tol))) && other.y1 > y1;
|
||||
}
|
||||
|
||||
|
||||
public boolean intersectsCenter(Box other, double tol) {
|
||||
|
||||
return !((x2 < other.xCenter() - tol) || (x1 > other.xCenter() + tol) || (y2 < other.yCenter() - tol) || (y1 > other.yCenter() + tol));
|
||||
}
|
||||
|
||||
|
||||
public Optional<Box> intersectRegion(Box other, double tol) {
|
||||
|
||||
if (!intersects(other, tol)) {
|
||||
return Optional.empty();
|
||||
} else {
|
||||
var overlapX1 = Math.max(x1, other.x1);
|
||||
var overlapY1 = Math.max(y1, other.y1);
|
||||
var overlapX2 = Math.min(x2, other.x2);
|
||||
var overlapY2 = Math.min(y2, other.y2);
|
||||
return Optional.of(new Box(overlapX1, overlapY1, overlapX2, overlapY2));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public double intersectArea(Box other, double tol) {
|
||||
|
||||
return intersectRegion(other, tol).map(Box::area).orElse(0d);
|
||||
}
|
||||
|
||||
|
||||
public boolean intersectsAny(List<Box> others, double tol) {
|
||||
|
||||
return others.stream().anyMatch(other -> intersects(other, tol));
|
||||
}
|
||||
|
||||
|
||||
public boolean intersectsAnyAndOver(List<Box> others, double tol) {
|
||||
|
||||
return others.stream().anyMatch(other -> intersectsAndOver(other, tol));
|
||||
}
|
||||
|
||||
|
||||
public boolean intersectsCenter(List<Box> others, double tol) {
|
||||
|
||||
return others.stream().anyMatch(other -> intersectsCenter(other, tol));
|
||||
}
|
||||
|
||||
|
||||
public boolean contains(Box other, double tol) {
|
||||
|
||||
return (x1 <= other.x1 + tol) && (y1 <= other.y1 + tol) && (x2 >= other.x2 - tol) && (y2 >= other.y2 - tol);
|
||||
}
|
||||
|
||||
|
||||
public Box container(Box other) {
|
||||
|
||||
var minX = Math.min(x1, other.x1);
|
||||
var minY = Math.min(y1, other.y1);
|
||||
var maxX = Math.max(x2, other.x2);
|
||||
var maxY = Math.max(y2, other.y2);
|
||||
return new Box(minX, minY, maxX, maxY);
|
||||
}
|
||||
|
||||
|
||||
public Box transform(AffineTransform affineTransform) {
|
||||
|
||||
Point2D point = affineTransform.transform(new Point2D.Double(x1, y1), null);
|
||||
Point2D point2 = affineTransform.transform(new Point2D.Double(x2, y2), null);
|
||||
return new Box(Math.min(point.getX(), point2.getX()), Math.min(point.getY(), point2.getY()), Math.max(point.getX(), point2.getX()), Math.max(point.getY(), point2.getY()));
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,51 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.graphics;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Queue;
|
||||
import java.util.Set;
|
||||
|
||||
public class DistinctQueue<T> {
|
||||
|
||||
private Queue<T> queue;
|
||||
private Set<T> set;
|
||||
|
||||
|
||||
public DistinctQueue() {
|
||||
|
||||
queue = new LinkedList<>();
|
||||
set = new HashSet<>();
|
||||
}
|
||||
|
||||
|
||||
public void enqueue(T element) {
|
||||
|
||||
if (!set.contains(element)) {
|
||||
queue.add(element);
|
||||
set.add(element);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public T dequeue() {
|
||||
|
||||
T element = queue.poll();
|
||||
if (element != null) {
|
||||
set.remove(element);
|
||||
}
|
||||
return element;
|
||||
}
|
||||
|
||||
|
||||
public boolean isEmpty() {
|
||||
|
||||
return queue.isEmpty();
|
||||
}
|
||||
|
||||
|
||||
public int size() {
|
||||
|
||||
return queue.size();
|
||||
}
|
||||
// Other methods as needed
|
||||
}
|
||||
@ -0,0 +1,172 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.graphics;
|
||||
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.awt.image.DataBufferByte;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.rendering.ImageType;
|
||||
import org.apache.pdfbox.rendering.PDFRenderer;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@Service
|
||||
public class FindGraphicsRaster {
|
||||
|
||||
// Pixels that are lighter then this threshold are ignored
|
||||
private final static int THRESHOLD = 240;
|
||||
|
||||
// DPI to render the image at, in practice sub-72 seems to risk pixels being lost
|
||||
private final static int DPI = 72;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public List<Box> findCCBoundingBoxes(PDDocument doc, List<Rectangle2D> remove, PageInformation pageInformation) {
|
||||
|
||||
var renderer = new PDFRenderer(doc);
|
||||
var img = renderer.renderImageWithDPI(pageInformation.number() - 1, DPI, ImageType.GRAY);
|
||||
var imageCtm = getImageCTM(pageInformation, img.getWidth());
|
||||
return findCCBoundingBoxes(img, remove, THRESHOLD, DPI / 72, imageCtm);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private List<Box> findCCBoundingBoxes(BufferedImage image, List<Rectangle2D> remove, int grayScaleTresh, int rescale, AffineTransform imageCTM) {
|
||||
|
||||
var inverseCTM = imageCTM.createInverse();
|
||||
|
||||
var h = image.getHeight();
|
||||
var w = image.getWidth();
|
||||
var pixels = new int[w * h];
|
||||
image.getRaster().getPixels(0, 0, w, h, pixels);
|
||||
remove.stream().map(rect -> inverseCTM.createTransformedShape(rect).getBounds2D()).forEach(box -> {
|
||||
for (int y = (int) Math.floor(box.getMinY() / rescale); y <= (int) Math.min(Math.ceil(box.getMaxY() / rescale), h); y++) {
|
||||
for (int x = (int) Math.floor(box.getMinX() / rescale); x <= (int) Math.min(Math.ceil(box.getMaxX() / rescale), w); x++) {
|
||||
pixels[w * y + x] = grayScaleTresh;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// var image2 = createImageFromMatrix(pixels, w, h);
|
||||
|
||||
return findCCBoundingBoxes(pixels, w, h, grayScaleTresh, rescale, imageCTM);
|
||||
}
|
||||
|
||||
|
||||
public static BufferedImage createImageFromMatrix(int[] matrix, int width, int height) {
|
||||
|
||||
BufferedImage image = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_GRAY);
|
||||
|
||||
byte[] pixelData = ((DataBufferByte) image.getRaster().getDataBuffer()).getData();
|
||||
|
||||
for (int y = 0; y < height; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
int index = y * width + x;
|
||||
int pixel = matrix[index]; // Assuming each element in the matrix represents a pixel color
|
||||
pixelData[index] = (byte) pixel;
|
||||
}
|
||||
}
|
||||
|
||||
return image;
|
||||
}
|
||||
|
||||
|
||||
private List<Box> findCCBoundingBoxes(int[] pixels, int w, int h, int pixThreshold, int rescale, AffineTransform imageCTM) {
|
||||
|
||||
DistinctQueue<Integer> pixelsToExplore = new DistinctQueue<>();
|
||||
var boundingBoxes = new ArrayList<Box>();
|
||||
|
||||
for (int y = 0; y < h; y++) {
|
||||
for (int x = 0; x < w; x++) {
|
||||
var pixelIndex = x + y * w;
|
||||
if (pixels[pixelIndex] < pixThreshold) {
|
||||
var minX = x;
|
||||
var maxX = x;
|
||||
var minY = y;
|
||||
var maxY = y;
|
||||
pixelsToExplore.enqueue(pixelIndex);
|
||||
while (!pixelsToExplore.isEmpty()) {
|
||||
var currentPixel = pixelsToExplore.dequeue();
|
||||
if (currentPixel > w) {
|
||||
var lowerPixel = currentPixel - w;
|
||||
if (pixels[lowerPixel] < pixThreshold) {
|
||||
pixelsToExplore.enqueue(currentPixel - w);
|
||||
minY = Math.min(minY, lowerPixel / w);
|
||||
}
|
||||
}
|
||||
if (currentPixel < pixels.length - w) {
|
||||
var upperPixel = currentPixel + w;
|
||||
if (pixels[upperPixel] < pixThreshold) {
|
||||
pixelsToExplore.enqueue(upperPixel);
|
||||
maxY = Math.max(maxY, upperPixel / w);
|
||||
}
|
||||
}
|
||||
if (currentPixel % w != 0) {
|
||||
var leftPixel = currentPixel - 1;
|
||||
if (pixels[leftPixel] < pixThreshold) {
|
||||
pixelsToExplore.enqueue(leftPixel);
|
||||
minX = Math.min(minX, leftPixel % w);
|
||||
}
|
||||
}
|
||||
if ((currentPixel + 1) % w != 0) {
|
||||
var rightPixel = currentPixel + 1;
|
||||
if (pixels[rightPixel] < pixThreshold) {
|
||||
pixelsToExplore.enqueue(rightPixel + 1);
|
||||
maxX = Math.max(maxX, rightPixel % w);
|
||||
}
|
||||
}
|
||||
// Set the current pixel to white so we don't visit it again.
|
||||
pixels[currentPixel] = pixThreshold;
|
||||
}
|
||||
boundingBoxes.add(new Box(minX * rescale, minY * rescale, maxX * rescale, maxY * rescale));
|
||||
}
|
||||
}
|
||||
}
|
||||
return boundingBoxes.stream().filter(box -> box.area() > 0).map(box -> box.transform(imageCTM)).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
public AffineTransform getImageCTM(PageInformation pageInformation, int imageWidth) {
|
||||
|
||||
double scalingFactor = calculateScalingFactor(pageInformation, imageWidth);
|
||||
AffineTransform imageToCropBoxScaling = new AffineTransform(scalingFactor, 0, 0, scalingFactor, -pageInformation.minX(), -pageInformation.minY());
|
||||
|
||||
AffineTransform mirrorMatrix = new AffineTransform(1, 0, 0, -1, 0, pageInformation.height());
|
||||
|
||||
AffineTransform rotationMatrix = switch (pageInformation.rotationDegrees()) {
|
||||
case 90 -> new AffineTransform(0, 1, -1, 0, pageInformation.height(), 0);
|
||||
case 180 -> new AffineTransform(-1, 0, 0, -1, pageInformation.width(), pageInformation.height());
|
||||
case 270 -> new AffineTransform(0, -1, 1, 0, pageInformation.width() - pageInformation.height(), pageInformation.height()); // results from 90 + 180 rotations
|
||||
default -> new AffineTransform();
|
||||
};
|
||||
|
||||
// matrix multiplication is performed from right to left, so the order is reversed.
|
||||
// scaling -> mirror -> rotation
|
||||
AffineTransform resultMatrix = new AffineTransform();
|
||||
|
||||
resultMatrix.concatenate(rotationMatrix);
|
||||
resultMatrix.concatenate(mirrorMatrix);
|
||||
resultMatrix.concatenate(imageToCropBoxScaling);
|
||||
return resultMatrix;
|
||||
}
|
||||
|
||||
|
||||
private double calculateScalingFactor(PageInformation pageInformation, int imageWidth) {
|
||||
|
||||
// PDFBox always returns page height and width based on rotation
|
||||
double pageWidth;
|
||||
if (pageInformation.rotationDegrees() == 90 || pageInformation.rotationDegrees() == 270) {
|
||||
pageWidth = pageInformation.height();
|
||||
} else {
|
||||
pageWidth = pageInformation.width();
|
||||
}
|
||||
|
||||
return pageWidth / imageWidth;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,247 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.graphics;
|
||||
|
||||
import java.awt.Rectangle;
|
||||
import java.awt.color.CMMException;
|
||||
import java.awt.geom.GeneralPath;
|
||||
import java.awt.geom.Point2D;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.pdfbox.contentstream.PDFGraphicsStreamEngine;
|
||||
import org.apache.pdfbox.contentstream.PDFStreamEngine;
|
||||
import org.apache.pdfbox.contentstream.operator.Operator;
|
||||
import org.apache.pdfbox.contentstream.operator.OperatorProcessor;
|
||||
import org.apache.pdfbox.cos.COSBase;
|
||||
import org.apache.pdfbox.cos.COSName;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImage;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class GraphicBBDetector extends PDFGraphicsStreamEngine {
|
||||
|
||||
private int clipWindingRule = -1;
|
||||
private final GeneralPath linePath = new GeneralPath();
|
||||
private final List<Rectangle> bounds = new ArrayList<>();
|
||||
private final PDColor emptyPattern = new PDColor(new float[3], null);
|
||||
|
||||
|
||||
public GraphicBBDetector(PDPage page, boolean ignoreWhite) {
|
||||
|
||||
super(page);
|
||||
|
||||
if (!ignoreWhite) {
|
||||
addOperator(new NullOp("d", this));
|
||||
addOperator(new NullOp("k", this));
|
||||
addOperator(new NullOp("K", this));
|
||||
addOperator(new NullOp("g", this));
|
||||
addOperator(new NullOp("G", this));
|
||||
addOperator(new NullOp("CS", this));
|
||||
addOperator(new NullOp("cs", this));
|
||||
addOperator(new NullOp("RG", this));
|
||||
addOperator(new NullOp("rg", this));
|
||||
addOperator(new NullOp("sc", this));
|
||||
addOperator(new NullOp("SC", this));
|
||||
addOperator(new NullOp("scn", this));
|
||||
addOperator(new NullOp("SCN", this));
|
||||
}
|
||||
|
||||
// Ignore text and font ops:
|
||||
addOperator(new NullOp("Tf", this));
|
||||
addOperator(new NullOp("Tj", this));
|
||||
addOperator(new NullOp("TJ", this));
|
||||
addOperator(new NullOp("T*", this));
|
||||
addOperator(new NullOp("'", this));
|
||||
addOperator(new NullOp("\"", this));
|
||||
}
|
||||
|
||||
|
||||
public List<Box> findGraphicBB() throws IOException {
|
||||
|
||||
processPage(getPage());
|
||||
return bounds.stream().map(r -> new Box(r.x, r.y, r.x + r.width, r.y + r.height)).filter(box -> box.area() > 0).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) {
|
||||
|
||||
linePath.moveTo((float) p0.getX(), (float) p0.getY());
|
||||
linePath.lineTo((float) p1.getX(), (float) p1.getY());
|
||||
linePath.lineTo((float) p2.getX(), (float) p2.getY());
|
||||
linePath.lineTo((float) p3.getX(), (float) p3.getY());
|
||||
linePath.closePath();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void drawImage(PDImage pdImage) {
|
||||
// Do nothing
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void clip(int windingRule) {
|
||||
|
||||
clipWindingRule = windingRule;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void moveTo(float x, float y) {
|
||||
|
||||
linePath.moveTo(x, y);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void lineTo(float x, float y) {
|
||||
|
||||
linePath.lineTo(x, y);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) {
|
||||
|
||||
linePath.curveTo(x1, y1, x2, y2, x3, y3);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Point2D getCurrentPoint() {
|
||||
|
||||
return linePath.getCurrentPoint();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void closePath() {
|
||||
|
||||
linePath.closePath();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void endPath() {
|
||||
|
||||
if (clipWindingRule != -1) {
|
||||
linePath.setWindingRule(clipWindingRule);
|
||||
getGraphicsState().intersectClippingPath(linePath);
|
||||
clipWindingRule = -1;
|
||||
}
|
||||
linePath.reset();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void strokePath() throws IOException {
|
||||
|
||||
addLinePath(true, false);
|
||||
linePath.reset();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void fillPath(int windingRule) throws IOException {
|
||||
|
||||
linePath.setWindingRule(windingRule);
|
||||
addLinePath(false, true);
|
||||
linePath.reset();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void fillAndStrokePath(int windingRule) throws IOException {
|
||||
|
||||
linePath.setWindingRule(windingRule);
|
||||
addLinePath(true, true);
|
||||
linePath.reset();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void shadingFill(COSName shadingName) {
|
||||
|
||||
var newBound = getGraphicsState().getCurrentClippingPath().getBounds();
|
||||
if (newBound.getWidth() > 0 && newBound.getHeight() > 0) {
|
||||
bounds.add(newBound);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void addLinePath(boolean stroke, boolean fill) throws IOException {
|
||||
|
||||
var newBound = getGraphicsState().getCurrentClippingPath().getBounds().intersection(linePath.getBounds());
|
||||
if (newBound.getWidth() > 0 && newBound.getHeight() > 0) {
|
||||
|
||||
if (stroke && !getGraphicsState().getStrokingColor().isPattern() && isBlack(getGraphicsState().getStrokingColor()) || //
|
||||
!stroke && !getGraphicsState().getNonStrokingColor().isPattern() && isBlack(getGraphicsState().getNonStrokingColor())) {
|
||||
bounds.add(newBound);
|
||||
}
|
||||
|
||||
// var skipWhiteGraphic = ignoreWhite && (!stroke || isWhite(getGraphicsState().getStrokingColor())) && (!fill || isWhite(getGraphicsState().getNonStrokingColor()));
|
||||
// if (!skipWhiteGraphic) {
|
||||
// bounds.add(newBound);
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private boolean isBlack(PDColor color) {
|
||||
|
||||
try {
|
||||
return color.toRGB() == 0;
|
||||
} catch (CMMException e) {
|
||||
// see https://github.com/haraldk/TwelveMonkeys/issues/124 or https://issues.apache.org/jira/browse/PDFBOX-3531
|
||||
// This is a quick and dirt hack
|
||||
// Happens for file 216.pdf
|
||||
log.debug(e.getMessage());
|
||||
var result = true;
|
||||
for (var component : color.getComponents()) {
|
||||
result = result && component == 0;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private boolean isWhite(PDColor color) throws IOException {
|
||||
|
||||
return !color.isPattern() && color.toRGB() == 16777215 || color.equals(emptyPattern);
|
||||
}
|
||||
|
||||
|
||||
private final class NullOp extends OperatorProcessor {
|
||||
|
||||
private final String name;
|
||||
|
||||
|
||||
private NullOp(String name, PDFStreamEngine context) {
|
||||
|
||||
super(context);
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void process(Operator operator, List<COSBase> operands) {
|
||||
// Do nothing.
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,107 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.graphics;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.SearchTextWithTextPositionFactory;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class GraphicExtractorService {
|
||||
|
||||
private final GraphicsClusteringService graphicsClusteringService;
|
||||
private final FindGraphicsRaster findGraphicsRaster;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public List<Box> extractPathElementGraphics(PDDocument pdDocument,
|
||||
PDPage pdPage,
|
||||
int pageNumber,
|
||||
CleanRulings cleanRulings,
|
||||
List<TextPositionSequence> textPositionSequences,
|
||||
List<Cell> emptyTableCells,
|
||||
boolean graphicsRaster) {
|
||||
|
||||
var characterBBoxes = getCharacterBBoxes(textPositionSequences);
|
||||
var tableLineBBoxes = getLineBBoxesFromTableCells(emptyTableCells);
|
||||
var underLineBBoxes = getUnderlineBBoxes(cleanRulings, characterBBoxes);
|
||||
var strikeThroughBBoxes = getStrikeThroughBBoxes(cleanRulings, characterBBoxes);
|
||||
|
||||
GraphicBBDetector graphicBBDetector = new GraphicBBDetector(pdPage, true);
|
||||
var graphicBBoxes = graphicBBDetector.findGraphicBB();
|
||||
|
||||
if (graphicsRaster) {
|
||||
// This should only be used if ocr was performed, it is currently in an early stage and needs to be improved.
|
||||
graphicBBoxes.addAll(findGraphicsRaster.findCCBoundingBoxes(pdDocument,
|
||||
characterBBoxes.stream().map(box -> new Rectangle2D.Double(box.x1 - 2, box.y1 - 2, box.width() + 4, box.height() + 4)).collect(Collectors.toList()),
|
||||
PageInformation.fromPDPage(pageNumber, pdPage)));
|
||||
}
|
||||
|
||||
var filteredGraphicBBoxes = graphicBBoxes.stream()
|
||||
.filter(box -> !box.intersectsAny(tableLineBBoxes, 4))
|
||||
.filter(box -> !box.intersectsAny(underLineBBoxes, 4))
|
||||
.filter(box -> !box.intersectsAny(strikeThroughBBoxes, 4))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
var clusters = graphicsClusteringService.getClusters(filteredGraphicBBoxes, 14);
|
||||
|
||||
return clusters.stream().filter(box -> box.area() > 500 && box.height() > 50 && box.width() > 50).toList();
|
||||
}
|
||||
|
||||
|
||||
private List<Box> getCharacterBBoxes(List<TextPositionSequence> textPositionSequences) {
|
||||
|
||||
return textPositionSequences.stream()
|
||||
.map(pos -> pos.getTextPositions()
|
||||
.stream()
|
||||
.map(tp -> SearchTextWithTextPositionFactory.mapRedTextPositionToInitialUserSpace(tp, pos))
|
||||
.collect(RectangleTransformations.collectBBox()))
|
||||
.map(Box::new)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
private List<Box> getLineBBoxesFromTableCells(List<Cell> emptyTableCells) {
|
||||
|
||||
List<Box> expandedTableLines = new ArrayList<>();
|
||||
|
||||
emptyTableCells.forEach(cell -> {
|
||||
expandedTableLines.add(new Box(new Rectangle2D.Double(cell.x, cell.y - 1, cell.width, 2)));
|
||||
expandedTableLines.add(new Box(new Rectangle2D.Double(cell.x, cell.y + cell.height - 1, cell.width, 2)));
|
||||
expandedTableLines.add(new Box(new Rectangle2D.Double(cell.x - 1, cell.y, 2, cell.height)));
|
||||
expandedTableLines.add(new Box(new Rectangle2D.Double(cell.x + cell.width - 1, cell.y, 2, cell.height)));
|
||||
});
|
||||
|
||||
return expandedTableLines;
|
||||
}
|
||||
|
||||
|
||||
private List<Box> getUnderlineBBoxes(CleanRulings cleanRulings, List<Box> characterBBoxes) {
|
||||
|
||||
return cleanRulings.getHorizontal()
|
||||
.stream()
|
||||
.map(h -> new Box(h.x1, h.y1, h.x2, h.y2))
|
||||
.filter(box -> box.intersectsAnyAndOver(characterBBoxes, 6))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
private List<Box> getStrikeThroughBBoxes(CleanRulings cleanRulings, List<Box> characterBBoxes) {
|
||||
|
||||
return cleanRulings.getHorizontal().stream().map(h -> new Box(h.x1, h.y1, h.x2, h.y2)).filter(box -> box.intersectsCenter(characterBBoxes, 2)).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,83 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.graphics;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Service
|
||||
public class GraphicsClusteringService {
|
||||
|
||||
public List<Box> getClusters(List<Box> boxes, double tol) {
|
||||
|
||||
if (boxes.isEmpty()) {
|
||||
return boxes;
|
||||
} else {
|
||||
// We iteratively pick a Box that intersects at least one other box and replace the
|
||||
// intersecting box with a Box containing them
|
||||
var currentBoxes = boxes;
|
||||
var foundIntersectingBoxes = true;
|
||||
while (foundIntersectingBoxes) {
|
||||
foundIntersectingBoxes = false;
|
||||
|
||||
// The box we are going to check to see if there are any intersecting boxes, followed by
|
||||
// any boxes that we have already check
|
||||
var checked = List.of(currentBoxes.get(0));
|
||||
var unchecked = currentBoxes.subList(1, currentBoxes.size());
|
||||
|
||||
while (!foundIntersectingBoxes && !unchecked.isEmpty()) {
|
||||
|
||||
List<Box> intersects = new ArrayList<>();
|
||||
List<Box> nonIntersects = new ArrayList<>();
|
||||
|
||||
for (Box uncheck : unchecked) {
|
||||
if (uncheck.intersects(checked.get(0), tol)) {
|
||||
intersects.add(uncheck);
|
||||
} else {
|
||||
nonIntersects.add(uncheck);
|
||||
}
|
||||
}
|
||||
|
||||
if (!intersects.isEmpty()) {
|
||||
List<Box> combinedIntersecting = new ArrayList<>();
|
||||
combinedIntersecting.add(checked.get(0));
|
||||
combinedIntersecting.addAll(intersects);
|
||||
var newBox = merge(combinedIntersecting);
|
||||
|
||||
List<Box> newCurrentBoxes = new ArrayList<>();
|
||||
newCurrentBoxes.add(newBox);
|
||||
newCurrentBoxes.addAll(checked.subList(1, checked.size()));
|
||||
newCurrentBoxes.addAll(nonIntersects);
|
||||
currentBoxes = newCurrentBoxes;
|
||||
foundIntersectingBoxes = true; // Exit this loop and re-enter the outer loop
|
||||
} else {
|
||||
List<Box> newChecked = new ArrayList<>();
|
||||
newChecked.add(unchecked.get(0));
|
||||
newChecked.addAll(checked);
|
||||
checked = newChecked;
|
||||
unchecked = unchecked.subList(1, unchecked.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
return currentBoxes;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public Box merge(List<Box> boxes) {
|
||||
|
||||
double minX = Double.POSITIVE_INFINITY;
|
||||
double minY = Double.POSITIVE_INFINITY;
|
||||
double maxX = Double.NEGATIVE_INFINITY;
|
||||
double maxY = Double.NEGATIVE_INFINITY;
|
||||
|
||||
for (Box box : boxes) {
|
||||
minX = Math.min(minX, box.x1);
|
||||
minY = Math.min(minY, box.y1);
|
||||
maxX = Math.max(maxX, box.x2);
|
||||
maxY = Math.max(maxY, box.y2);
|
||||
}
|
||||
return new Box(minX, minY, maxX, maxY);
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,42 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.graphics;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
|
||||
public record PageInformation(Rectangle2D mediabox, int number, int rotationDegrees) {
|
||||
|
||||
public static PageInformation fromPDPage(int pageNum, PDPage page) {
|
||||
|
||||
PDRectangle mediaBox = page.getMediaBox();
|
||||
return new PageInformation(new Rectangle2D.Double(mediaBox.getLowerLeftX(), mediaBox.getLowerLeftY(), mediaBox.getWidth(), mediaBox.getHeight()),
|
||||
pageNum,
|
||||
page.getRotation());
|
||||
}
|
||||
|
||||
|
||||
public double height() {
|
||||
|
||||
return mediabox.getHeight();
|
||||
}
|
||||
|
||||
|
||||
public double width() {
|
||||
|
||||
return mediabox.getWidth();
|
||||
}
|
||||
|
||||
|
||||
public double minX() {
|
||||
|
||||
return mediabox.getX();
|
||||
}
|
||||
|
||||
|
||||
public double minY() {
|
||||
|
||||
return mediabox.getY();
|
||||
}
|
||||
|
||||
}
|
||||
@ -27,7 +27,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
||||
@SneakyThrows
|
||||
public void testViewerDocument() {
|
||||
|
||||
String fileName = "files/new/ScrambledTextAfterSorting.pdf";
|
||||
String fileName = "files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
|
||||
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
|
||||
|
||||
var documentFile = new ClassPathResource(fileName).getFile();
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user