RED-4875 - call logic of new repo pdftron-logic-commons instead of local one

This commit is contained in:
Thomas Beyer 2023-03-17 10:33:48 +01:00
parent 74a094b42d
commit 143538fa40
7 changed files with 725 additions and 708 deletions

View File

@ -23,6 +23,12 @@
<groupId>com.iqser.red.commons</groupId>
<artifactId>storage-commons</artifactId>
</dependency>
<dependency>
<groupId>com.iqser.red.commons</groupId>
<artifactId>pdftron-logic-commons</artifactId>
<version>dev_red4875_2_4dc4d</version>
</dependency>
<dependency>
<groupId>com.iqser.red.commons</groupId>
<artifactId>spring-commons</artifactId>

View File

@ -10,6 +10,7 @@ import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Import;
import org.springframework.scheduling.annotation.EnableAsync;
import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService;
import com.iqser.red.service.ocr.v1.server.client.FileStatusProcessingUpdateClient;
import com.iqser.red.service.ocr.v1.server.configuration.MessagingConfiguration;
import com.iqser.red.service.ocr.v1.server.multitenancy.AsyncConfig;
@ -44,4 +45,11 @@ public class Application {
return new TimedAspect(registry);
}
@Bean
public InvisibleElementRemovalService invisibleElementRemovalService() {
return new InvisibleElementRemovalService();
}
}

View File

@ -1,68 +1,68 @@
package com.iqser.red.service.ocr.v1.server.model;
import static com.iqser.red.service.ocr.v1.server.service.InvisibleElementRemovalService.TOLERANCE;
import java.awt.geom.Area;
import java.awt.geom.GeneralPath;
import java.awt.geom.Rectangle2D;
import java.util.Deque;
import java.util.LinkedList;
import com.pdftron.pdf.Rect;
import lombok.Data;
import lombok.SneakyThrows;
@Data
public class ClippingPathStack {
private Deque<Area> stack = new LinkedList<>();
@SneakyThrows
public ClippingPathStack(Rect rectangle) {
stack.push(new Area(new Rectangle2D.Double(rectangle.getX1(), rectangle.getY1(), rectangle.getWidth(), rectangle.getHeight()).getBounds2D()));
}
@SneakyThrows
public void intersectClippingPath(GeneralPath path) {
getCurrentClippingPath().intersect(new Area(path));
}
public boolean almostIntersects(double x, double y, double width, double height) {
// To address inconsistencies in the calculation of the bounding box we slightly increase the rectangle
// Height or width are zero for straight lines, even though they are being rendered. Therefore, height or width must be at minimum >0.
double x_with_tolerance = x > 0 ? x - TOLERANCE : x + TOLERANCE;
double y_with_tolerance = y > 0 ? y - TOLERANCE : y + TOLERANCE;
double width_with_tolerance = width + (2 * TOLERANCE);
double height_with_tolerance = height + (2 * TOLERANCE);
return getCurrentClippingPath().intersects(x_with_tolerance, y_with_tolerance, width_with_tolerance, height_with_tolerance);
}
public Area getCurrentClippingPath() {
return stack.peek();
}
public void enterNewGState() {
Area current = stack.peek();
Area cloned = new Area();
cloned.add(current);
stack.push(cloned);
}
public void leaveGState() {
stack.pop();
}
}
//package com.iqser.red.service.ocr.v1.server.model;
//
//import static com.iqser.red.service.ocr.v1.server.service.InvisibleElementRemovalService.TOLERANCE;
//
//import java.awt.geom.Area;
//import java.awt.geom.GeneralPath;
//import java.awt.geom.Rectangle2D;
//import java.util.Deque;
//import java.util.LinkedList;
//
//import com.pdftron.pdf.Rect;
//
//import lombok.Data;
//import lombok.SneakyThrows;
//
//@Data
//public class ClippingPathStack {
//
// private Deque<Area> stack = new LinkedList<>();
//
//
// @SneakyThrows
// public ClippingPathStack(Rect rectangle) {
//
// stack.push(new Area(new Rectangle2D.Double(rectangle.getX1(), rectangle.getY1(), rectangle.getWidth(), rectangle.getHeight()).getBounds2D()));
// }
//
//
// @SneakyThrows
// public void intersectClippingPath(GeneralPath path) {
//
// getCurrentClippingPath().intersect(new Area(path));
// }
//
//
// public boolean almostIntersects(double x, double y, double width, double height) {
// // To address inconsistencies in the calculation of the bounding box we slightly increase the rectangle
// // Height or width are zero for straight lines, even though they are being rendered. Therefore, height or width must be at minimum >0.
//
// double x_with_tolerance = x > 0 ? x - TOLERANCE : x + TOLERANCE;
// double y_with_tolerance = y > 0 ? y - TOLERANCE : y + TOLERANCE;
// double width_with_tolerance = width + (2 * TOLERANCE);
// double height_with_tolerance = height + (2 * TOLERANCE);
// return getCurrentClippingPath().intersects(x_with_tolerance, y_with_tolerance, width_with_tolerance, height_with_tolerance);
// }
//
//
// public Area getCurrentClippingPath() {
//
// return stack.peek();
// }
//
//
// public void enterNewGState() {
//
// Area current = stack.peek();
// Area cloned = new Area();
// cloned.add(current);
// stack.push(cloned);
// }
//
//
// public void leaveGState() {
//
// stack.pop();
// }
//
//}

View File

@ -1,170 +1,170 @@
package com.iqser.red.service.ocr.v1.server.model;
import static com.iqser.red.service.ocr.v1.server.service.InvisibleElementRemovalService.TOLERANCE;
import java.awt.geom.Rectangle2D;
import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.Element;
import com.pdftron.pdf.Rect;
import lombok.AccessLevel;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
@Getter
@SuperBuilder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class ElementFeatures {
int elementType;
Rectangle2D boundingBox;
public boolean almostMatches(Element element) throws PDFNetException {
return element.getType() == elementType && //
element.getBBox() != null && //
rectsAlmostMatch(element.getBBox());
}
protected boolean almostEqual(double a, double b) {
return Math.abs(a - b) < TOLERANCE;
}
@SneakyThrows
private boolean rectsAlmostMatch(Rect bBox) {
// To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance
return almostEqual(bBox.getX1(), boundingBox.getX()) && //
almostEqual(bBox.getY1(), boundingBox.getY()) && //
almostEqual(bBox.getWidth(), boundingBox.getWidth()) && //
almostEqual(bBox.getHeight(), boundingBox.getHeight());
}
@EqualsAndHashCode(callSuper = true)
@Getter
@SuperBuilder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
private static class Text extends ElementFeatures {
String text;
int font;
double fontsize;
@Override
public boolean almostMatches(Element element) throws PDFNetException {
return super.almostMatches(element) && //
text.equals(element.getTextString()) && //
font == element.getGState().getFont().getType() && //
almostEqual(fontsize, element.getGState().getFontSize());
}
}
@EqualsAndHashCode(callSuper = true)
@Getter
@SuperBuilder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
private static class Path extends ElementFeatures {
boolean isClippingPath;
boolean isClipWindingFill;
boolean isStroked;
boolean isFilled;
boolean isWindingFill;
@Override
public boolean almostMatches(Element element) throws PDFNetException {
return super.almostMatches(element) && //
isClippingPath == element.isClippingPath() && //
isClipWindingFill == element.isClipWindingFill() && //
isStroked == element.isStroked() && //
isFilled == element.isFilled() && //
isWindingFill == element.isWindingFill();
}
}
@EqualsAndHashCode(callSuper = true)
@Getter
@SuperBuilder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
private static class Image extends ElementFeatures {
int dataSize;
int height;
int width;
int renderingIntent;
int componentNum;
int bitsPerComponent;
@Override
public boolean almostMatches(Element element) throws PDFNetException {
return super.almostMatches(element) && //
dataSize == element.getImageDataSize() && //
height == element.getImageHeight() && //
width == element.getImageWidth() && //
renderingIntent == element.getImageRenderingIntent() && //
componentNum == element.getComponentNum() && //
bitsPerComponent == element.getBitsPerComponent();
}
}
public static ElementFeatures extractFeatures(Element element) throws PDFNetException {
return switch (element.getType()) {
case Element.e_path -> Path.builder()
.elementType(element.getType())
.boundingBox(toRectangle2D(element.getBBox()))
.isClippingPath(element.isClippingPath())
.isClipWindingFill(element.isClipWindingFill())
.isStroked(element.isStroked())
.isFilled(element.isFilled())
.isWindingFill(element.isWindingFill())
.build();
case Element.e_text -> Text.builder()
.elementType(element.getType())
.boundingBox(toRectangle2D(element.getBBox()))
.text(element.getTextString())
.font(element.getGState().getFont().getType())
.fontsize(element.getGState().getFontSize())
.build();
case Element.e_image, Element.e_inline_image -> Image.builder()
.elementType(element.getType())
.boundingBox(toRectangle2D(element.getBBox()))
.dataSize(element.getImageDataSize())
.height(element.getImageHeight())
.width(element.getImageWidth())
.renderingIntent(element.getImageRenderingIntent())
.componentNum(element.getComponentNum())
.bitsPerComponent(element.getBitsPerComponent())
.build();
// This technically should never happen, it's a safetynet
default -> throw new RuntimeException("Feature Extraction is not supported for PDFTron.Element with type: " + element.getType());
};
}
private static Rectangle2D toRectangle2D(Rect rect) throws PDFNetException {
return new Rectangle2D.Double(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
}
}
//package com.iqser.red.service.ocr.v1.server.model;
//
//import static com.iqser.red.service.ocr.v1.server.service.InvisibleElementRemovalService.TOLERANCE;
//
//import java.awt.geom.Rectangle2D;
//
//import com.pdftron.common.PDFNetException;
//import com.pdftron.pdf.Element;
//import com.pdftron.pdf.Rect;
//
//import lombok.AccessLevel;
//import lombok.EqualsAndHashCode;
//import lombok.Getter;
//import lombok.SneakyThrows;
//import lombok.experimental.FieldDefaults;
//import lombok.experimental.SuperBuilder;
//
//@Getter
//@SuperBuilder
//@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
//public class ElementFeatures {
//
// int elementType;
// Rectangle2D boundingBox;
//
//
// public boolean almostMatches(Element element) throws PDFNetException {
//
// return element.getType() == elementType && //
// element.getBBox() != null && //
// rectsAlmostMatch(element.getBBox());
// }
//
//
// protected boolean almostEqual(double a, double b) {
//
// return Math.abs(a - b) < TOLERANCE;
// }
//
//
// @SneakyThrows
// private boolean rectsAlmostMatch(Rect bBox) {
// // To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance
//
// return almostEqual(bBox.getX1(), boundingBox.getX()) && //
// almostEqual(bBox.getY1(), boundingBox.getY()) && //
// almostEqual(bBox.getWidth(), boundingBox.getWidth()) && //
// almostEqual(bBox.getHeight(), boundingBox.getHeight());
// }
//
//
// @EqualsAndHashCode(callSuper = true)
// @Getter
// @SuperBuilder
// @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
// private static class Text extends ElementFeatures {
//
// String text;
// int font;
// double fontsize;
//
//
// @Override
// public boolean almostMatches(Element element) throws PDFNetException {
//
// return super.almostMatches(element) && //
// text.equals(element.getTextString()) && //
// font == element.getGState().getFont().getType() && //
// almostEqual(fontsize, element.getGState().getFontSize());
// }
//
// }
//
// @EqualsAndHashCode(callSuper = true)
// @Getter
// @SuperBuilder
// @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
// private static class Path extends ElementFeatures {
//
// boolean isClippingPath;
// boolean isClipWindingFill;
// boolean isStroked;
// boolean isFilled;
// boolean isWindingFill;
//
//
// @Override
// public boolean almostMatches(Element element) throws PDFNetException {
//
// return super.almostMatches(element) && //
// isClippingPath == element.isClippingPath() && //
// isClipWindingFill == element.isClipWindingFill() && //
// isStroked == element.isStroked() && //
// isFilled == element.isFilled() && //
// isWindingFill == element.isWindingFill();
//
// }
//
// }
//
// @EqualsAndHashCode(callSuper = true)
// @Getter
// @SuperBuilder
// @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
// private static class Image extends ElementFeatures {
//
// int dataSize;
// int height;
// int width;
// int renderingIntent;
// int componentNum;
// int bitsPerComponent;
//
//
// @Override
// public boolean almostMatches(Element element) throws PDFNetException {
//
// return super.almostMatches(element) && //
// dataSize == element.getImageDataSize() && //
// height == element.getImageHeight() && //
// width == element.getImageWidth() && //
// renderingIntent == element.getImageRenderingIntent() && //
// componentNum == element.getComponentNum() && //
// bitsPerComponent == element.getBitsPerComponent();
// }
//
// }
//
//
// public static ElementFeatures extractFeatures(Element element) throws PDFNetException {
//
// return switch (element.getType()) {
// case Element.e_path -> Path.builder()
// .elementType(element.getType())
// .boundingBox(toRectangle2D(element.getBBox()))
// .isClippingPath(element.isClippingPath())
// .isClipWindingFill(element.isClipWindingFill())
// .isStroked(element.isStroked())
// .isFilled(element.isFilled())
// .isWindingFill(element.isWindingFill())
// .build();
// case Element.e_text -> Text.builder()
// .elementType(element.getType())
// .boundingBox(toRectangle2D(element.getBBox()))
// .text(element.getTextString())
// .font(element.getGState().getFont().getType())
// .fontsize(element.getGState().getFontSize())
// .build();
// case Element.e_image, Element.e_inline_image -> Image.builder()
// .elementType(element.getType())
// .boundingBox(toRectangle2D(element.getBBox()))
// .dataSize(element.getImageDataSize())
// .height(element.getImageHeight())
// .width(element.getImageWidth())
// .renderingIntent(element.getImageRenderingIntent())
// .componentNum(element.getComponentNum())
// .bitsPerComponent(element.getBitsPerComponent())
// .build();
// // This technically should never happen, it's a safetynet
// default -> throw new RuntimeException("Feature Extraction is not supported for PDFTron.Element with type: " + element.getType());
// };
// }
//
//
// private static Rectangle2D toRectangle2D(Rect rect) throws PDFNetException {
//
// return new Rectangle2D.Double(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
// }
//
//}

View File

@ -1,466 +1,466 @@
package com.iqser.red.service.ocr.v1.server.service;
import java.awt.Shape;
import java.awt.geom.AffineTransform;
import java.awt.geom.GeneralPath;
import java.awt.geom.Rectangle2D;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import org.springframework.stereotype.Service;
import com.google.common.primitives.Bytes;
import com.google.common.primitives.Doubles;
import com.iqser.red.service.ocr.v1.server.model.ClippingPathStack;
import com.iqser.red.service.ocr.v1.server.model.ElementFeatures;
import com.pdftron.common.Matrix2D;
import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.ColorPt;
import com.pdftron.pdf.ColorSpace;
import com.pdftron.pdf.Element;
import com.pdftron.pdf.ElementBuilder;
import com.pdftron.pdf.ElementReader;
import com.pdftron.pdf.ElementWriter;
import com.pdftron.pdf.GState;
import com.pdftron.pdf.PDFDoc;
import com.pdftron.pdf.Page;
import com.pdftron.pdf.PageIterator;
import com.pdftron.pdf.PathData;
import com.pdftron.pdf.Rect;
import com.pdftron.sdf.Obj;
import com.pdftron.sdf.SDFDoc;
import lombok.Builder;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
public class InvisibleElementRemovalService {
static public final double TOLERANCE = 1e-3;
/**
* Removes all hidden Text, Path and Image Elements from a PDF Document.
* handled cases:
* -Text which is transparent or is set to not render
* -Elements outside of clipping path
* -Elements that have been painted over by visible and filled Paths
* unhandled cases:
* -Elements covered by widely stroked path
* -Elements with the same color as background
* -Any Text set to clipping with its many interactions with other elements
*
* @param pdfFile The PDF file to process
* @param delta If this flag is set only the removed Elements will be written to the output file.
* The Elements are red if they are removed by clipping path, blue for transparency, and a green bounding box for overlap.
* @param out OutputStream to write the resulting file to
**/
@SneakyThrows
public void removeInvisibleElements(InputStream pdfFile, OutputStream out, boolean delta) {
PDFDoc pdfDoc = new PDFDoc(pdfFile);
ElementWriter writer = new ElementWriter();
ElementReader reader = new ElementReader();
Set<Long> visitedXObjIds = new TreeSet<>();
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
Page page = iterator.next();
visitedXObjIds.add(page.getSDFObj().getObjNum());
InvisibleElementRemovalContext context = InvisibleElementRemovalContext.builder()
.reader(reader)
.clippingPathStack(new ClippingPathStack(page.getMediaBox()))
.delta(delta)
.overlappedElements(new ArrayList<>())
.visibleElements(new ArrayList<>())
.visitedXObjIds(visitedXObjIds)
.build();
removeClippedElementsAndInvisibleTextAndRememberOverlappedElements(page, writer, context);
context.visitedXObjIds().clear();
removeOverlappedElements(page, writer, context);
}
try {
pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
} catch (Exception e) {
log.error("File could not be saved after invisible element removal");
throw new RuntimeException(e);
}
writer.destroy();
reader.destroy();
pdfDoc.close();
}
private void removeClippedElementsAndInvisibleTextAndRememberOverlappedElements(Page page,
ElementWriter writer,
InvisibleElementRemovalContext context) throws PDFNetException {
context.reader().begin(page);
writer.begin(page, ElementWriter.e_replacement, false, true, page.getResourceDict());
processElements(writer, context);
writer.end();
context.reader().end();
}
private void processElements(ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
for (Element element = context.reader().next(); element != null; element = context.reader().next())
switch (element.getType()) {
case Element.e_image, Element.e_inline_image -> processImages(element, writer, context);
case Element.e_text -> processText(element, writer, context);
case Element.e_path -> processPath(element, writer, context);
case Element.e_form -> processForm(element, writer, context);
case Element.e_group_begin -> {
context.clippingPathStack().enterNewGState();
writer.writeElement(element);
}
case Element.e_group_end -> {
context.clippingPathStack().leaveGState();
writer.writeElement(element);
}
default -> writer.writeElement(element);
}
}
private void processImages(Element imageElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
Rect rect = imageElement.getBBox();
if (rect == null) {
return;
}
boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
if (!context.delta() && inClippingPath) {
context.visibleElements().add(ElementFeatures.extractFeatures(imageElement));
}
if (context.delta() ^ inClippingPath) {
writer.writeElement(imageElement);
}
}
private void processText(Element textElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
Rect rect = textElement.getBBox();
if (rect == null) {
writer.writeElement(textElement);
return;
}
GState gState = textElement.getGState();
boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
boolean isTextVisible = isTextRenderedVisibly(gState);
if (inClippingPath && isTextVisible) {
context.visibleElements().add(ElementFeatures.extractFeatures(textElement));
}
if (!context.delta()) {
if (inClippingPath && isTextVisible) {
writer.writeElement(textElement);
} else if (textElement.hasTextMatrix()) {
/*
PDFTron Element with type "text" refers to a Tj command. If a Tm command is just above it in the pdf file, PDFTron will join the two commands and treat them as one Element.
hasTextMatrix() checks for this case specifically. Also, Tm changes the position for a whole BT/ET segment, possibly containing multiple Tj commands.
Therefore, the position of a following Tj is affected by not writing the first Element.
This is why, we write only the Tm command:
*/
writer.writeGStateChanges(textElement);
}
} else {
if (!inClippingPath) {
gState.setFillColorSpace(ColorSpace.createDeviceRGB());
// red for elements removed by clipping path
gState.setFillColor(new ColorPt(1, 0, 0));
writer.writeElement(textElement);
}
if (!isTextVisible) {
gState.setFillColorSpace(ColorSpace.createDeviceRGB());
// blue for elements removed due to transparency or not rendered
gState.setFillColor(new ColorPt(0, 0, 1));
gState.setTextRenderMode(GState.e_fill_text);
gState.setFillOpacity(1);
writer.writeElement(textElement);
}
}
}
private void processForm(Element formElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
writer.writeElement(formElement);
Obj formObj = formElement.getXObject();
if (!context.visitedXObjIds().contains(formObj.getObjNum())) {
context.visitedXObjIds().add(formObj.getObjNum());
// writer needs to be newly initialized when entering a new content stream
// see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest)
ElementWriter formWriter = new ElementWriter();
context.reader().formBegin();
formWriter.begin(formObj);
context.reader().clearChangeList();
formWriter.setDefaultGState(context.reader());
processElements(formWriter, context);
formWriter.end();
formWriter.destroy();
context.reader().end();
}
}
private void processPath(Element pathElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
PathData pathData = pathElement.getPathData();
if (pathData.getOperators().length == 0 && pathData.getPoints().length == 0) {
writer.writeGStateChanges(pathElement);
return;
}
GeneralPath linePath = convertToGeneralPath(pathData);
//transform path to initial user space
var ctm = pathElement.getCTM();
var affineTransform = toAffineTransform(ctm);
linePath.transform(affineTransform);
var rect = linePath.getBounds2D();
boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX(), rect.getY(), rect.getWidth(), rect.getHeight());
if (pathElement.isClippingPath()) {
if (pathElement.isClipWindingFill()) {
linePath.setWindingRule(GeneralPath.WIND_NON_ZERO);
} else {
linePath.setWindingRule(GeneralPath.WIND_EVEN_ODD);
}
context.clippingPathStack().intersectClippingPath(linePath);
pathElement.setPathClip(!context.delta());
writer.writeElement(pathElement);
} else {
if (pathElement.isWindingFill()) {
linePath.setWindingRule(GeneralPath.WIND_NON_ZERO);
} else {
linePath.setWindingRule(GeneralPath.WIND_EVEN_ODD);
}
if (inClippingPath) {
if (isFilledAndNonTransparent(pathElement)) {
List<ElementFeatures> currentOverlappedElements = context.visibleElements()
.stream()
.filter(features -> almostContains(linePath, features.getBoundingBox()))
.toList();
context.overlappedElements().addAll(currentOverlappedElements);
context.visibleElements().removeAll(currentOverlappedElements);
}
context.visibleElements().add(ElementFeatures.extractFeatures(pathElement));
if (!context.delta()) {
writer.writeElement(pathElement);
}
}
if (context.delta() && !inClippingPath) {
pathElement.getGState().setFillColorSpace(ColorSpace.createDeviceRGB());
pathElement.getGState().setFillColor(new ColorPt(1, 0, 0));
pathElement.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
pathElement.getGState().setStrokeColor(new ColorPt(1, 0, 0));
writer.writeElement(pathElement);
}
}
}
private void removeOverlappedElements(Page page, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
context.reader().begin(page);
writer.begin(page, ElementWriter.e_replacement, false, true, page.getResourceDict());
if (context.delta()) {
// green for element removed due to overlapping
context.overlappedElements().forEach(feature -> drawBBox(writer, feature.getBoundingBox(), "#00FF00"));
context.overlappedElements().clear();
}
processOverlappedElements(writer, context);
writer.end();
context.reader().end();
if (context.overlappedElements().size() > 0) {
log.warn(context.overlappedElements().size() + " overlapped elements have not been found or removed");
}
}
private void processOverlappedElements(ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
for (Element element = context.reader().next(); element != null; element = context.reader().next()) {
switch (element.getType()) {
case Element.e_form -> processFormOverlappedElements(writer, element, context);
case Element.e_path, Element.e_image, Element.e_inline_image, Element.e_text -> {
boolean anyMatch = false;
for (ElementFeatures elementToRemove : context.overlappedElements()) {
if (elementToRemove.almostMatches(element)) {
context.overlappedElements().remove(elementToRemove);
anyMatch = true;
break;
}
}
if (!anyMatch) {
writer.writeElement(element);
} else if (element.getType() == 3 && element.hasTextMatrix()) {
/*
PDFTron Element with type "text" refers to a Tj command. If a Tm command is just above it in the pdf file, PDFTron will join the two commands and treat them as one Element.
hasTextMatrix() checks for this case specifically. Also, Tm changes the position for a whole BT/ET segment, possibly containing multiple Tj commands.
Therefore, the position of a following Tj is affected by not writing the first Element.
This is why, we write only the Tm command:
*/
writer.writeGStateChanges(element);
}
}
default -> writer.writeElement(element);
}
}
}
private void processFormOverlappedElements(ElementWriter writer, Element formElement, InvisibleElementRemovalContext context) throws PDFNetException {
writer.writeElement(formElement);
Obj formObj = formElement.getXObject();
if (!context.visitedXObjIds().contains(formObj.getObjNum())) {
context.visitedXObjIds().add(formObj.getObjNum());
// writer needs to be newly initialized when entering a new content stream
// see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest)
ElementWriter formWriter = new ElementWriter();
context.reader().formBegin();
formWriter.begin(formObj);
context.reader().clearChangeList();
formWriter.setDefaultGState(context.reader());
processOverlappedElements(formWriter, context);
formWriter.end();
formWriter.destroy();
context.reader().end();
}
}
private boolean isTextRenderedVisibly(GState gState) throws PDFNetException {
return gState.getTextRenderMode() != GState.e_invisible_text && //
!(gState.getTextRenderMode() == GState.e_fill_text && gState.getFillOpacity() == 0) && //
!(gState.getTextRenderMode() == GState.e_stroke_text && gState.getStrokeOpacity() == 0) && //
!(gState.getTextRenderMode() == GState.e_fill_stroke_text && gState.getFillOpacity() == 0 && gState.getStrokeOpacity() == 0);
}
private GeneralPath convertToGeneralPath(PathData pathData) throws PDFNetException {
GeneralPath linePath = new GeneralPath();
Iterator<Double> points = Doubles.asList(pathData.getPoints()).iterator();
Iterable<Byte> operators = Bytes.asList(pathData.getOperators());
for (var operator : operators) {
switch (operator) {
case PathData.e_moveto -> linePath.moveTo(points.next(), points.next());
case PathData.e_lineto -> linePath.lineTo(points.next(), points.next());
case PathData.e_cubicto -> linePath.curveTo(points.next(), points.next(), points.next(), points.next(), points.next(), points.next());
case PathData.e_closepath -> linePath.closePath();
case PathData.e_rect -> {
double x = points.next();
double y = points.next();
double w = points.next();
double h = points.next();
linePath.moveTo(x, y);
linePath.lineTo(x + w, y);
linePath.lineTo(x + w, y + h);
linePath.lineTo(x, y + h);
linePath.closePath();
}
default -> throw new PDFNetException("Invalid Element Type", 0, "", "", "");
}
}
return linePath;
}
private boolean almostContains(Shape outer, Rectangle2D inner) {
//To address inconsistencies in the calculation of the bounding box we slightly shrink the inner rectangle
double x_with_tolerance = inner.getX() >= 0 ? inner.getX() + TOLERANCE : inner.getX() - TOLERANCE;
double y_with_tolerance = inner.getY() >= 0 ? inner.getY() + TOLERANCE : inner.getY() - TOLERANCE;
double height_with_tolerance = inner.getHeight() - (2 * TOLERANCE);
double width_with_tolerance = inner.getWidth() - (2 * TOLERANCE);
Rectangle2D innerRect = new Rectangle2D.Double(x_with_tolerance, y_with_tolerance, width_with_tolerance, height_with_tolerance);
return outer.contains(innerRect);
}
private boolean isFilledAndNonTransparent(Element element) throws PDFNetException {
return element.isFilled() && element.getGState().getFillOpacity() == 1;
}
@SneakyThrows
private void drawBBox(ElementWriter writer, Rectangle2D r, String hexcolor) {
ColorPt colorPt = new ColorPt(Integer.valueOf(hexcolor.substring(1, 3), 16) / 255d,
Integer.valueOf(hexcolor.substring(3, 5), 16) / 255d,
Integer.valueOf(hexcolor.substring(5, 7), 16) / 255d);
ElementBuilder eb = new ElementBuilder();
Element rect = eb.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight());
rect.setPathStroke(true);
rect.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
rect.getGState().setStrokeColor(colorPt);
writer.writePlacedElement(rect);
colorPt.destroy();
eb.destroy();
}
private static AffineTransform toAffineTransform(Matrix2D ctm) throws PDFNetException {
return new AffineTransform(ctm.getA(), ctm.getB(), ctm.getC(), ctm.getD(), ctm.getH(), ctm.getV());
}
@Builder
private record InvisibleElementRemovalContext(
boolean delta,
ElementReader reader,
ClippingPathStack clippingPathStack,
List<ElementFeatures> overlappedElements,
List<ElementFeatures> visibleElements,
Set<Long> visitedXObjIds) {
}
}
//package com.iqser.red.service.ocr.v1.server.service;
//
//import java.awt.Shape;
//import java.awt.geom.AffineTransform;
//import java.awt.geom.GeneralPath;
//import java.awt.geom.Rectangle2D;
//import java.io.InputStream;
//import java.io.OutputStream;
//import java.util.ArrayList;
//import java.util.Iterator;
//import java.util.List;
//import java.util.Set;
//import java.util.TreeSet;
//
//import org.springframework.stereotype.Service;
//
//import com.google.common.primitives.Bytes;
//import com.google.common.primitives.Doubles;
//import com.iqser.red.service.ocr.v1.server.model.ClippingPathStack;
//import com.iqser.red.service.ocr.v1.server.model.ElementFeatures;
//import com.pdftron.common.Matrix2D;
//import com.pdftron.common.PDFNetException;
//import com.pdftron.pdf.ColorPt;
//import com.pdftron.pdf.ColorSpace;
//import com.pdftron.pdf.Element;
//import com.pdftron.pdf.ElementBuilder;
//import com.pdftron.pdf.ElementReader;
//import com.pdftron.pdf.ElementWriter;
//import com.pdftron.pdf.GState;
//import com.pdftron.pdf.PDFDoc;
//import com.pdftron.pdf.Page;
//import com.pdftron.pdf.PageIterator;
//import com.pdftron.pdf.PathData;
//import com.pdftron.pdf.Rect;
//import com.pdftron.sdf.Obj;
//import com.pdftron.sdf.SDFDoc;
//
//import lombok.Builder;
//import lombok.SneakyThrows;
//import lombok.extern.slf4j.Slf4j;
//
//@Slf4j
//@Service
//public class InvisibleElementRemovalService {
//
// static public final double TOLERANCE = 1e-3;
//
//
// /**
// * Removes all hidden Text, Path and Image Elements from a PDF Document.
// * handled cases:
// * -Text which is transparent or is set to not render
// * -Elements outside of clipping path
// * -Elements that have been painted over by visible and filled Paths
// * unhandled cases:
// * -Elements covered by widely stroked path
// * -Elements with the same color as background
// * -Any Text set to clipping with its many interactions with other elements
// *
// * @param pdfFile The PDF file to process
// * @param delta If this flag is set only the removed Elements will be written to the output file.
// * The Elements are red if they are removed by clipping path, blue for transparency, and a green bounding box for overlap.
// * @param out OutputStream to write the resulting file to
// **/
// @SneakyThrows
// public void removeInvisibleElements(InputStream pdfFile, OutputStream out, boolean delta) {
//
// PDFDoc pdfDoc = new PDFDoc(pdfFile);
//
// ElementWriter writer = new ElementWriter();
// ElementReader reader = new ElementReader();
// Set<Long> visitedXObjIds = new TreeSet<>();
//
// for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
//
// Page page = iterator.next();
//
// visitedXObjIds.add(page.getSDFObj().getObjNum());
//
//
// InvisibleElementRemovalContext context = InvisibleElementRemovalContext.builder()
// .reader(reader)
// .clippingPathStack(new ClippingPathStack(page.getMediaBox()))
// .delta(delta)
// .overlappedElements(new ArrayList<>())
// .visibleElements(new ArrayList<>())
// .visitedXObjIds(visitedXObjIds)
// .build();
//
// removeClippedElementsAndInvisibleTextAndRememberOverlappedElements(page, writer, context);
//
// context.visitedXObjIds().clear();
//
// removeOverlappedElements(page, writer, context);
// }
//
// try {
// pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
// } catch (Exception e) {
// log.error("File could not be saved after invisible element removal");
// throw new RuntimeException(e);
// }
//
// writer.destroy();
// reader.destroy();
// pdfDoc.close();
// }
//
//
// private void removeClippedElementsAndInvisibleTextAndRememberOverlappedElements(Page page,
// ElementWriter writer,
// InvisibleElementRemovalContext context) throws PDFNetException {
//
// context.reader().begin(page);
// writer.begin(page, ElementWriter.e_replacement, false, true, page.getResourceDict());
// processElements(writer, context);
// writer.end();
// context.reader().end();
// }
//
//
// private void processElements(ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
//
// for (Element element = context.reader().next(); element != null; element = context.reader().next())
// switch (element.getType()) {
// case Element.e_image, Element.e_inline_image -> processImages(element, writer, context);
// case Element.e_text -> processText(element, writer, context);
// case Element.e_path -> processPath(element, writer, context);
// case Element.e_form -> processForm(element, writer, context);
// case Element.e_group_begin -> {
// context.clippingPathStack().enterNewGState();
// writer.writeElement(element);
// }
// case Element.e_group_end -> {
// context.clippingPathStack().leaveGState();
// writer.writeElement(element);
// }
// default -> writer.writeElement(element);
// }
// }
//
//
// private void processImages(Element imageElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
//
// Rect rect = imageElement.getBBox();
//
// if (rect == null) {
// return;
// }
//
// boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
//
// if (!context.delta() && inClippingPath) {
// context.visibleElements().add(ElementFeatures.extractFeatures(imageElement));
// }
//
// if (context.delta() ^ inClippingPath) {
// writer.writeElement(imageElement);
// }
// }
//
//
// private void processText(Element textElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
//
// Rect rect = textElement.getBBox();
//
// if (rect == null) {
// writer.writeElement(textElement);
// return;
// }
//
// GState gState = textElement.getGState();
//
// boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
//
// boolean isTextVisible = isTextRenderedVisibly(gState);
//
// if (inClippingPath && isTextVisible) {
// context.visibleElements().add(ElementFeatures.extractFeatures(textElement));
// }
// if (!context.delta()) {
// if (inClippingPath && isTextVisible) {
// writer.writeElement(textElement);
// } else if (textElement.hasTextMatrix()) {
// /*
// PDFTron Element with type "text" refers to a Tj command. If a Tm command is just above it in the pdf file, PDFTron will join the two commands and treat them as one Element.
// hasTextMatrix() checks for this case specifically. Also, Tm changes the position for a whole BT/ET segment, possibly containing multiple Tj commands.
// Therefore, the position of a following Tj is affected by not writing the first Element.
// This is why, we write only the Tm command:
// */
// writer.writeGStateChanges(textElement);
// }
// } else {
// if (!inClippingPath) {
// gState.setFillColorSpace(ColorSpace.createDeviceRGB());
// // red for elements removed by clipping path
// gState.setFillColor(new ColorPt(1, 0, 0));
// writer.writeElement(textElement);
// }
// if (!isTextVisible) {
// gState.setFillColorSpace(ColorSpace.createDeviceRGB());
// // blue for elements removed due to transparency or not rendered
// gState.setFillColor(new ColorPt(0, 0, 1));
// gState.setTextRenderMode(GState.e_fill_text);
// gState.setFillOpacity(1);
// writer.writeElement(textElement);
// }
// }
// }
//
//
// private void processForm(Element formElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
//
// writer.writeElement(formElement);
// Obj formObj = formElement.getXObject();
//
// if (!context.visitedXObjIds().contains(formObj.getObjNum())) {
// context.visitedXObjIds().add(formObj.getObjNum());
// // writer needs to be newly initialized when entering a new content stream
// // see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest)
// ElementWriter formWriter = new ElementWriter();
// context.reader().formBegin();
// formWriter.begin(formObj);
//
// context.reader().clearChangeList();
// formWriter.setDefaultGState(context.reader());
//
// processElements(formWriter, context);
// formWriter.end();
// formWriter.destroy();
// context.reader().end();
// }
// }
//
//
// private void processPath(Element pathElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
//
// PathData pathData = pathElement.getPathData();
//
// if (pathData.getOperators().length == 0 && pathData.getPoints().length == 0) {
// writer.writeGStateChanges(pathElement);
// return;
// }
//
// GeneralPath linePath = convertToGeneralPath(pathData);
//
// //transform path to initial user space
// var ctm = pathElement.getCTM();
// var affineTransform = toAffineTransform(ctm);
// linePath.transform(affineTransform);
//
// var rect = linePath.getBounds2D();
//
// boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX(), rect.getY(), rect.getWidth(), rect.getHeight());
//
// if (pathElement.isClippingPath()) {
// if (pathElement.isClipWindingFill()) {
// linePath.setWindingRule(GeneralPath.WIND_NON_ZERO);
// } else {
// linePath.setWindingRule(GeneralPath.WIND_EVEN_ODD);
// }
//
// context.clippingPathStack().intersectClippingPath(linePath);
// pathElement.setPathClip(!context.delta());
// writer.writeElement(pathElement);
//
// } else {
// if (pathElement.isWindingFill()) {
// linePath.setWindingRule(GeneralPath.WIND_NON_ZERO);
// } else {
// linePath.setWindingRule(GeneralPath.WIND_EVEN_ODD);
// }
//
// if (inClippingPath) {
// if (isFilledAndNonTransparent(pathElement)) {
// List<ElementFeatures> currentOverlappedElements = context.visibleElements()
// .stream()
// .filter(features -> almostContains(linePath, features.getBoundingBox()))
// .toList();
// context.overlappedElements().addAll(currentOverlappedElements);
// context.visibleElements().removeAll(currentOverlappedElements);
// }
// context.visibleElements().add(ElementFeatures.extractFeatures(pathElement));
// if (!context.delta()) {
// writer.writeElement(pathElement);
// }
// }
// if (context.delta() && !inClippingPath) {
// pathElement.getGState().setFillColorSpace(ColorSpace.createDeviceRGB());
// pathElement.getGState().setFillColor(new ColorPt(1, 0, 0));
// pathElement.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
// pathElement.getGState().setStrokeColor(new ColorPt(1, 0, 0));
// writer.writeElement(pathElement);
// }
// }
// }
//
//
// private void removeOverlappedElements(Page page, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
//
// context.reader().begin(page);
// writer.begin(page, ElementWriter.e_replacement, false, true, page.getResourceDict());
// if (context.delta()) {
// // green for element removed due to overlapping
// context.overlappedElements().forEach(feature -> drawBBox(writer, feature.getBoundingBox(), "#00FF00"));
// context.overlappedElements().clear();
// }
// processOverlappedElements(writer, context);
// writer.end();
// context.reader().end();
//
// if (context.overlappedElements().size() > 0) {
// log.warn(context.overlappedElements().size() + " overlapped elements have not been found or removed");
// }
// }
//
//
// private void processOverlappedElements(ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
//
// for (Element element = context.reader().next(); element != null; element = context.reader().next()) {
// switch (element.getType()) {
// case Element.e_form -> processFormOverlappedElements(writer, element, context);
// case Element.e_path, Element.e_image, Element.e_inline_image, Element.e_text -> {
// boolean anyMatch = false;
// for (ElementFeatures elementToRemove : context.overlappedElements()) {
// if (elementToRemove.almostMatches(element)) {
// context.overlappedElements().remove(elementToRemove);
// anyMatch = true;
// break;
// }
// }
// if (!anyMatch) {
// writer.writeElement(element);
// } else if (element.getType() == 3 && element.hasTextMatrix()) {
// /*
// PDFTron Element with type "text" refers to a Tj command. If a Tm command is just above it in the pdf file, PDFTron will join the two commands and treat them as one Element.
// hasTextMatrix() checks for this case specifically. Also, Tm changes the position for a whole BT/ET segment, possibly containing multiple Tj commands.
// Therefore, the position of a following Tj is affected by not writing the first Element.
// This is why, we write only the Tm command:
// */
// writer.writeGStateChanges(element);
// }
// }
// default -> writer.writeElement(element);
// }
// }
// }
//
//
// private void processFormOverlappedElements(ElementWriter writer, Element formElement, InvisibleElementRemovalContext context) throws PDFNetException {
//
// writer.writeElement(formElement);
// Obj formObj = formElement.getXObject();
//
// if (!context.visitedXObjIds().contains(formObj.getObjNum())) {
// context.visitedXObjIds().add(formObj.getObjNum());
// // writer needs to be newly initialized when entering a new content stream
// // see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest)
// ElementWriter formWriter = new ElementWriter();
// context.reader().formBegin();
// formWriter.begin(formObj);
//
// context.reader().clearChangeList();
// formWriter.setDefaultGState(context.reader());
//
// processOverlappedElements(formWriter, context);
// formWriter.end();
// formWriter.destroy();
// context.reader().end();
// }
// }
//
//
// private boolean isTextRenderedVisibly(GState gState) throws PDFNetException {
//
// return gState.getTextRenderMode() != GState.e_invisible_text && //
// !(gState.getTextRenderMode() == GState.e_fill_text && gState.getFillOpacity() == 0) && //
// !(gState.getTextRenderMode() == GState.e_stroke_text && gState.getStrokeOpacity() == 0) && //
// !(gState.getTextRenderMode() == GState.e_fill_stroke_text && gState.getFillOpacity() == 0 && gState.getStrokeOpacity() == 0);
// }
//
//
// private GeneralPath convertToGeneralPath(PathData pathData) throws PDFNetException {
//
// GeneralPath linePath = new GeneralPath();
// Iterator<Double> points = Doubles.asList(pathData.getPoints()).iterator();
// Iterable<Byte> operators = Bytes.asList(pathData.getOperators());
// for (var operator : operators) {
// switch (operator) {
// case PathData.e_moveto -> linePath.moveTo(points.next(), points.next());
// case PathData.e_lineto -> linePath.lineTo(points.next(), points.next());
// case PathData.e_cubicto -> linePath.curveTo(points.next(), points.next(), points.next(), points.next(), points.next(), points.next());
// case PathData.e_closepath -> linePath.closePath();
// case PathData.e_rect -> {
// double x = points.next();
// double y = points.next();
// double w = points.next();
// double h = points.next();
// linePath.moveTo(x, y);
// linePath.lineTo(x + w, y);
// linePath.lineTo(x + w, y + h);
// linePath.lineTo(x, y + h);
// linePath.closePath();
// }
// default -> throw new PDFNetException("Invalid Element Type", 0, "", "", "");
// }
// }
// return linePath;
// }
//
//
// private boolean almostContains(Shape outer, Rectangle2D inner) {
// //To address inconsistencies in the calculation of the bounding box we slightly shrink the inner rectangle
//
// double x_with_tolerance = inner.getX() >= 0 ? inner.getX() + TOLERANCE : inner.getX() - TOLERANCE;
// double y_with_tolerance = inner.getY() >= 0 ? inner.getY() + TOLERANCE : inner.getY() - TOLERANCE;
// double height_with_tolerance = inner.getHeight() - (2 * TOLERANCE);
// double width_with_tolerance = inner.getWidth() - (2 * TOLERANCE);
// Rectangle2D innerRect = new Rectangle2D.Double(x_with_tolerance, y_with_tolerance, width_with_tolerance, height_with_tolerance);
//
// return outer.contains(innerRect);
// }
//
//
// private boolean isFilledAndNonTransparent(Element element) throws PDFNetException {
//
// return element.isFilled() && element.getGState().getFillOpacity() == 1;
// }
//
//
// @SneakyThrows
// private void drawBBox(ElementWriter writer, Rectangle2D r, String hexcolor) {
//
// ColorPt colorPt = new ColorPt(Integer.valueOf(hexcolor.substring(1, 3), 16) / 255d,
// Integer.valueOf(hexcolor.substring(3, 5), 16) / 255d,
// Integer.valueOf(hexcolor.substring(5, 7), 16) / 255d);
// ElementBuilder eb = new ElementBuilder();
// Element rect = eb.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight());
// rect.setPathStroke(true);
// rect.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
// rect.getGState().setStrokeColor(colorPt);
// writer.writePlacedElement(rect);
//
// colorPt.destroy();
// eb.destroy();
// }
//
//
// private static AffineTransform toAffineTransform(Matrix2D ctm) throws PDFNetException {
//
// return new AffineTransform(ctm.getA(), ctm.getB(), ctm.getC(), ctm.getD(), ctm.getH(), ctm.getV());
// }
//
//
// @Builder
// private record InvisibleElementRemovalContext(
// boolean delta,
// ElementReader reader,
// ClippingPathStack clippingPathStack,
// List<ElementFeatures> overlappedElements,
// List<ElementFeatures> visibleElements,
// Set<Long> visitedXObjIds) {
//
// }
//
//}

View File

@ -14,6 +14,7 @@ import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService;
import com.iqser.red.service.ocr.v1.api.model.OCRStatusUpdateResponse;
import com.iqser.red.service.ocr.v1.server.configuration.MessagingConfiguration;
import com.iqser.red.service.ocr.v1.server.settings.OcrServiceSettings;
@ -69,10 +70,10 @@ public class OCRService {
try (ByteArrayOutputStream transferOutputStream = new ByteArrayOutputStream()) {
try (InputStream fileStream = fileStorageService.getOriginalFileAsStream(dossierId, fileId)) {
long removalStart = System.currentTimeMillis();
log.debug("Start invisible element removal for file with dossierId {} and fileId {}", dossierId, fileId);
log.info("Start invisible element removal for file with dossierId {} and fileId {}", dossierId, fileId);
invisibleElementRemovalService.removeInvisibleElements(fileStream, transferOutputStream, false);
long removalEnd = System.currentTimeMillis();
log.debug("Invisible element removal successful for file with dossierId {} and fileId {}, took {}s",
log.info("Invisible element removal successful for file with dossierId {} and fileId {}, took {}s",
dossierId,
fileId,
format("%.1f", (removalEnd - removalStart) / 1000.0));

View File

@ -9,16 +9,18 @@ import java.io.FileOutputStream;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.core.io.ClassPathResource;
import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService;
import com.iqser.red.service.ocr.v1.server.AbstractTest;
import lombok.SneakyThrows;
public class InvisibleElementRemovalServiceTest extends AbstractTest {
@Autowired
private InvisibleElementRemovalService invisibleElementRemovalService;
@Autowired
private InvisibleElementRemovalService invisibleElementRemovalService;
@Test