RED-4875 - call logic of new repo pdftron-logic-commons instead of local one

This commit is contained in:
Thomas Beyer 2023-03-17 10:33:48 +01:00
parent 74a094b42d
commit 143538fa40
7 changed files with 725 additions and 708 deletions

View File

@ -23,6 +23,12 @@
<groupId>com.iqser.red.commons</groupId> <groupId>com.iqser.red.commons</groupId>
<artifactId>storage-commons</artifactId> <artifactId>storage-commons</artifactId>
</dependency> </dependency>
<dependency>
<groupId>com.iqser.red.commons</groupId>
<artifactId>pdftron-logic-commons</artifactId>
<version>dev_red4875_2_4dc4d</version>
</dependency>
<dependency> <dependency>
<groupId>com.iqser.red.commons</groupId> <groupId>com.iqser.red.commons</groupId>
<artifactId>spring-commons</artifactId> <artifactId>spring-commons</artifactId>

View File

@ -10,6 +10,7 @@ import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Import;
import org.springframework.scheduling.annotation.EnableAsync; import org.springframework.scheduling.annotation.EnableAsync;
import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService;
import com.iqser.red.service.ocr.v1.server.client.FileStatusProcessingUpdateClient; import com.iqser.red.service.ocr.v1.server.client.FileStatusProcessingUpdateClient;
import com.iqser.red.service.ocr.v1.server.configuration.MessagingConfiguration; import com.iqser.red.service.ocr.v1.server.configuration.MessagingConfiguration;
import com.iqser.red.service.ocr.v1.server.multitenancy.AsyncConfig; import com.iqser.red.service.ocr.v1.server.multitenancy.AsyncConfig;
@ -44,4 +45,11 @@ public class Application {
return new TimedAspect(registry); return new TimedAspect(registry);
} }
@Bean
public InvisibleElementRemovalService invisibleElementRemovalService() {
return new InvisibleElementRemovalService();
}
} }

View File

@ -1,68 +1,68 @@
package com.iqser.red.service.ocr.v1.server.model; //package com.iqser.red.service.ocr.v1.server.model;
//
import static com.iqser.red.service.ocr.v1.server.service.InvisibleElementRemovalService.TOLERANCE; //import static com.iqser.red.service.ocr.v1.server.service.InvisibleElementRemovalService.TOLERANCE;
//
import java.awt.geom.Area; //import java.awt.geom.Area;
import java.awt.geom.GeneralPath; //import java.awt.geom.GeneralPath;
import java.awt.geom.Rectangle2D; //import java.awt.geom.Rectangle2D;
import java.util.Deque; //import java.util.Deque;
import java.util.LinkedList; //import java.util.LinkedList;
//
import com.pdftron.pdf.Rect; //import com.pdftron.pdf.Rect;
//
import lombok.Data; //import lombok.Data;
import lombok.SneakyThrows; //import lombok.SneakyThrows;
//
@Data //@Data
public class ClippingPathStack { //public class ClippingPathStack {
//
private Deque<Area> stack = new LinkedList<>(); // private Deque<Area> stack = new LinkedList<>();
//
//
@SneakyThrows // @SneakyThrows
public ClippingPathStack(Rect rectangle) { // public ClippingPathStack(Rect rectangle) {
//
stack.push(new Area(new Rectangle2D.Double(rectangle.getX1(), rectangle.getY1(), rectangle.getWidth(), rectangle.getHeight()).getBounds2D())); // stack.push(new Area(new Rectangle2D.Double(rectangle.getX1(), rectangle.getY1(), rectangle.getWidth(), rectangle.getHeight()).getBounds2D()));
} // }
//
//
@SneakyThrows // @SneakyThrows
public void intersectClippingPath(GeneralPath path) { // public void intersectClippingPath(GeneralPath path) {
//
getCurrentClippingPath().intersect(new Area(path)); // getCurrentClippingPath().intersect(new Area(path));
} // }
//
//
public boolean almostIntersects(double x, double y, double width, double height) { // public boolean almostIntersects(double x, double y, double width, double height) {
// To address inconsistencies in the calculation of the bounding box we slightly increase the rectangle // // To address inconsistencies in the calculation of the bounding box we slightly increase the rectangle
// Height or width are zero for straight lines, even though they are being rendered. Therefore, height or width must be at minimum >0. // // Height or width are zero for straight lines, even though they are being rendered. Therefore, height or width must be at minimum >0.
//
double x_with_tolerance = x > 0 ? x - TOLERANCE : x + TOLERANCE; // double x_with_tolerance = x > 0 ? x - TOLERANCE : x + TOLERANCE;
double y_with_tolerance = y > 0 ? y - TOLERANCE : y + TOLERANCE; // double y_with_tolerance = y > 0 ? y - TOLERANCE : y + TOLERANCE;
double width_with_tolerance = width + (2 * TOLERANCE); // double width_with_tolerance = width + (2 * TOLERANCE);
double height_with_tolerance = height + (2 * TOLERANCE); // double height_with_tolerance = height + (2 * TOLERANCE);
return getCurrentClippingPath().intersects(x_with_tolerance, y_with_tolerance, width_with_tolerance, height_with_tolerance); // return getCurrentClippingPath().intersects(x_with_tolerance, y_with_tolerance, width_with_tolerance, height_with_tolerance);
} // }
//
//
public Area getCurrentClippingPath() { // public Area getCurrentClippingPath() {
//
return stack.peek(); // return stack.peek();
} // }
//
//
public void enterNewGState() { // public void enterNewGState() {
//
Area current = stack.peek(); // Area current = stack.peek();
Area cloned = new Area(); // Area cloned = new Area();
cloned.add(current); // cloned.add(current);
stack.push(cloned); // stack.push(cloned);
} // }
//
//
public void leaveGState() { // public void leaveGState() {
//
stack.pop(); // stack.pop();
} // }
//
} //}

View File

@ -1,170 +1,170 @@
package com.iqser.red.service.ocr.v1.server.model; //package com.iqser.red.service.ocr.v1.server.model;
//
import static com.iqser.red.service.ocr.v1.server.service.InvisibleElementRemovalService.TOLERANCE; //import static com.iqser.red.service.ocr.v1.server.service.InvisibleElementRemovalService.TOLERANCE;
//
import java.awt.geom.Rectangle2D; //import java.awt.geom.Rectangle2D;
//
import com.pdftron.common.PDFNetException; //import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.Element; //import com.pdftron.pdf.Element;
import com.pdftron.pdf.Rect; //import com.pdftron.pdf.Rect;
//
import lombok.AccessLevel; //import lombok.AccessLevel;
import lombok.EqualsAndHashCode; //import lombok.EqualsAndHashCode;
import lombok.Getter; //import lombok.Getter;
import lombok.SneakyThrows; //import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults; //import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder; //import lombok.experimental.SuperBuilder;
//
@Getter //@Getter
@SuperBuilder //@SuperBuilder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) //@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class ElementFeatures { //public class ElementFeatures {
//
int elementType; // int elementType;
Rectangle2D boundingBox; // Rectangle2D boundingBox;
//
//
public boolean almostMatches(Element element) throws PDFNetException { // public boolean almostMatches(Element element) throws PDFNetException {
//
return element.getType() == elementType && // // return element.getType() == elementType && //
element.getBBox() != null && // // element.getBBox() != null && //
rectsAlmostMatch(element.getBBox()); // rectsAlmostMatch(element.getBBox());
} // }
//
//
protected boolean almostEqual(double a, double b) { // protected boolean almostEqual(double a, double b) {
//
return Math.abs(a - b) < TOLERANCE; // return Math.abs(a - b) < TOLERANCE;
} // }
//
//
@SneakyThrows // @SneakyThrows
private boolean rectsAlmostMatch(Rect bBox) { // private boolean rectsAlmostMatch(Rect bBox) {
// To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance // // To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance
//
return almostEqual(bBox.getX1(), boundingBox.getX()) && // // return almostEqual(bBox.getX1(), boundingBox.getX()) && //
almostEqual(bBox.getY1(), boundingBox.getY()) && // // almostEqual(bBox.getY1(), boundingBox.getY()) && //
almostEqual(bBox.getWidth(), boundingBox.getWidth()) && // // almostEqual(bBox.getWidth(), boundingBox.getWidth()) && //
almostEqual(bBox.getHeight(), boundingBox.getHeight()); // almostEqual(bBox.getHeight(), boundingBox.getHeight());
} // }
//
//
@EqualsAndHashCode(callSuper = true) // @EqualsAndHashCode(callSuper = true)
@Getter // @Getter
@SuperBuilder // @SuperBuilder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) // @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
private static class Text extends ElementFeatures { // private static class Text extends ElementFeatures {
//
String text; // String text;
int font; // int font;
double fontsize; // double fontsize;
//
//
@Override // @Override
public boolean almostMatches(Element element) throws PDFNetException { // public boolean almostMatches(Element element) throws PDFNetException {
//
return super.almostMatches(element) && // // return super.almostMatches(element) && //
text.equals(element.getTextString()) && // // text.equals(element.getTextString()) && //
font == element.getGState().getFont().getType() && // // font == element.getGState().getFont().getType() && //
almostEqual(fontsize, element.getGState().getFontSize()); // almostEqual(fontsize, element.getGState().getFontSize());
} // }
//
} // }
//
@EqualsAndHashCode(callSuper = true) // @EqualsAndHashCode(callSuper = true)
@Getter // @Getter
@SuperBuilder // @SuperBuilder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) // @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
private static class Path extends ElementFeatures { // private static class Path extends ElementFeatures {
//
boolean isClippingPath; // boolean isClippingPath;
boolean isClipWindingFill; // boolean isClipWindingFill;
boolean isStroked; // boolean isStroked;
boolean isFilled; // boolean isFilled;
boolean isWindingFill; // boolean isWindingFill;
//
//
@Override // @Override
public boolean almostMatches(Element element) throws PDFNetException { // public boolean almostMatches(Element element) throws PDFNetException {
//
return super.almostMatches(element) && // // return super.almostMatches(element) && //
isClippingPath == element.isClippingPath() && // // isClippingPath == element.isClippingPath() && //
isClipWindingFill == element.isClipWindingFill() && // // isClipWindingFill == element.isClipWindingFill() && //
isStroked == element.isStroked() && // // isStroked == element.isStroked() && //
isFilled == element.isFilled() && // // isFilled == element.isFilled() && //
isWindingFill == element.isWindingFill(); // isWindingFill == element.isWindingFill();
//
} // }
//
} // }
//
@EqualsAndHashCode(callSuper = true) // @EqualsAndHashCode(callSuper = true)
@Getter // @Getter
@SuperBuilder // @SuperBuilder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) // @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
private static class Image extends ElementFeatures { // private static class Image extends ElementFeatures {
//
int dataSize; // int dataSize;
int height; // int height;
int width; // int width;
int renderingIntent; // int renderingIntent;
int componentNum; // int componentNum;
int bitsPerComponent; // int bitsPerComponent;
//
//
@Override // @Override
public boolean almostMatches(Element element) throws PDFNetException { // public boolean almostMatches(Element element) throws PDFNetException {
//
return super.almostMatches(element) && // // return super.almostMatches(element) && //
dataSize == element.getImageDataSize() && // // dataSize == element.getImageDataSize() && //
height == element.getImageHeight() && // // height == element.getImageHeight() && //
width == element.getImageWidth() && // // width == element.getImageWidth() && //
renderingIntent == element.getImageRenderingIntent() && // // renderingIntent == element.getImageRenderingIntent() && //
componentNum == element.getComponentNum() && // // componentNum == element.getComponentNum() && //
bitsPerComponent == element.getBitsPerComponent(); // bitsPerComponent == element.getBitsPerComponent();
} // }
//
} // }
//
//
public static ElementFeatures extractFeatures(Element element) throws PDFNetException { // public static ElementFeatures extractFeatures(Element element) throws PDFNetException {
//
return switch (element.getType()) { // return switch (element.getType()) {
case Element.e_path -> Path.builder() // case Element.e_path -> Path.builder()
.elementType(element.getType()) // .elementType(element.getType())
.boundingBox(toRectangle2D(element.getBBox())) // .boundingBox(toRectangle2D(element.getBBox()))
.isClippingPath(element.isClippingPath()) // .isClippingPath(element.isClippingPath())
.isClipWindingFill(element.isClipWindingFill()) // .isClipWindingFill(element.isClipWindingFill())
.isStroked(element.isStroked()) // .isStroked(element.isStroked())
.isFilled(element.isFilled()) // .isFilled(element.isFilled())
.isWindingFill(element.isWindingFill()) // .isWindingFill(element.isWindingFill())
.build(); // .build();
case Element.e_text -> Text.builder() // case Element.e_text -> Text.builder()
.elementType(element.getType()) // .elementType(element.getType())
.boundingBox(toRectangle2D(element.getBBox())) // .boundingBox(toRectangle2D(element.getBBox()))
.text(element.getTextString()) // .text(element.getTextString())
.font(element.getGState().getFont().getType()) // .font(element.getGState().getFont().getType())
.fontsize(element.getGState().getFontSize()) // .fontsize(element.getGState().getFontSize())
.build(); // .build();
case Element.e_image, Element.e_inline_image -> Image.builder() // case Element.e_image, Element.e_inline_image -> Image.builder()
.elementType(element.getType()) // .elementType(element.getType())
.boundingBox(toRectangle2D(element.getBBox())) // .boundingBox(toRectangle2D(element.getBBox()))
.dataSize(element.getImageDataSize()) // .dataSize(element.getImageDataSize())
.height(element.getImageHeight()) // .height(element.getImageHeight())
.width(element.getImageWidth()) // .width(element.getImageWidth())
.renderingIntent(element.getImageRenderingIntent()) // .renderingIntent(element.getImageRenderingIntent())
.componentNum(element.getComponentNum()) // .componentNum(element.getComponentNum())
.bitsPerComponent(element.getBitsPerComponent()) // .bitsPerComponent(element.getBitsPerComponent())
.build(); // .build();
// This technically should never happen, it's a safetynet // // This technically should never happen, it's a safetynet
default -> throw new RuntimeException("Feature Extraction is not supported for PDFTron.Element with type: " + element.getType()); // default -> throw new RuntimeException("Feature Extraction is not supported for PDFTron.Element with type: " + element.getType());
}; // };
} // }
//
//
private static Rectangle2D toRectangle2D(Rect rect) throws PDFNetException { // private static Rectangle2D toRectangle2D(Rect rect) throws PDFNetException {
//
return new Rectangle2D.Double(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight()); // return new Rectangle2D.Double(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
} // }
//
} //}

View File

@ -1,466 +1,466 @@
package com.iqser.red.service.ocr.v1.server.service; //package com.iqser.red.service.ocr.v1.server.service;
//
import java.awt.Shape; //import java.awt.Shape;
import java.awt.geom.AffineTransform; //import java.awt.geom.AffineTransform;
import java.awt.geom.GeneralPath; //import java.awt.geom.GeneralPath;
import java.awt.geom.Rectangle2D; //import java.awt.geom.Rectangle2D;
import java.io.InputStream; //import java.io.InputStream;
import java.io.OutputStream; //import java.io.OutputStream;
import java.util.ArrayList; //import java.util.ArrayList;
import java.util.Iterator; //import java.util.Iterator;
import java.util.List; //import java.util.List;
import java.util.Set; //import java.util.Set;
import java.util.TreeSet; //import java.util.TreeSet;
//
import org.springframework.stereotype.Service; //import org.springframework.stereotype.Service;
//
import com.google.common.primitives.Bytes; //import com.google.common.primitives.Bytes;
import com.google.common.primitives.Doubles; //import com.google.common.primitives.Doubles;
import com.iqser.red.service.ocr.v1.server.model.ClippingPathStack; //import com.iqser.red.service.ocr.v1.server.model.ClippingPathStack;
import com.iqser.red.service.ocr.v1.server.model.ElementFeatures; //import com.iqser.red.service.ocr.v1.server.model.ElementFeatures;
import com.pdftron.common.Matrix2D; //import com.pdftron.common.Matrix2D;
import com.pdftron.common.PDFNetException; //import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.ColorPt; //import com.pdftron.pdf.ColorPt;
import com.pdftron.pdf.ColorSpace; //import com.pdftron.pdf.ColorSpace;
import com.pdftron.pdf.Element; //import com.pdftron.pdf.Element;
import com.pdftron.pdf.ElementBuilder; //import com.pdftron.pdf.ElementBuilder;
import com.pdftron.pdf.ElementReader; //import com.pdftron.pdf.ElementReader;
import com.pdftron.pdf.ElementWriter; //import com.pdftron.pdf.ElementWriter;
import com.pdftron.pdf.GState; //import com.pdftron.pdf.GState;
import com.pdftron.pdf.PDFDoc; //import com.pdftron.pdf.PDFDoc;
import com.pdftron.pdf.Page; //import com.pdftron.pdf.Page;
import com.pdftron.pdf.PageIterator; //import com.pdftron.pdf.PageIterator;
import com.pdftron.pdf.PathData; //import com.pdftron.pdf.PathData;
import com.pdftron.pdf.Rect; //import com.pdftron.pdf.Rect;
import com.pdftron.sdf.Obj; //import com.pdftron.sdf.Obj;
import com.pdftron.sdf.SDFDoc; //import com.pdftron.sdf.SDFDoc;
//
import lombok.Builder; //import lombok.Builder;
import lombok.SneakyThrows; //import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j; //import lombok.extern.slf4j.Slf4j;
//
@Slf4j //@Slf4j
@Service //@Service
public class InvisibleElementRemovalService { //public class InvisibleElementRemovalService {
//
static public final double TOLERANCE = 1e-3; // static public final double TOLERANCE = 1e-3;
//
//
/** // /**
* Removes all hidden Text, Path and Image Elements from a PDF Document. // * Removes all hidden Text, Path and Image Elements from a PDF Document.
* handled cases: // * handled cases:
* -Text which is transparent or is set to not render // * -Text which is transparent or is set to not render
* -Elements outside of clipping path // * -Elements outside of clipping path
* -Elements that have been painted over by visible and filled Paths // * -Elements that have been painted over by visible and filled Paths
* unhandled cases: // * unhandled cases:
* -Elements covered by widely stroked path // * -Elements covered by widely stroked path
* -Elements with the same color as background // * -Elements with the same color as background
* -Any Text set to clipping with its many interactions with other elements // * -Any Text set to clipping with its many interactions with other elements
* // *
* @param pdfFile The PDF file to process // * @param pdfFile The PDF file to process
* @param delta If this flag is set only the removed Elements will be written to the output file. // * @param delta If this flag is set only the removed Elements will be written to the output file.
* The Elements are red if they are removed by clipping path, blue for transparency, and a green bounding box for overlap. // * The Elements are red if they are removed by clipping path, blue for transparency, and a green bounding box for overlap.
* @param out OutputStream to write the resulting file to // * @param out OutputStream to write the resulting file to
**/ // **/
@SneakyThrows // @SneakyThrows
public void removeInvisibleElements(InputStream pdfFile, OutputStream out, boolean delta) { // public void removeInvisibleElements(InputStream pdfFile, OutputStream out, boolean delta) {
//
PDFDoc pdfDoc = new PDFDoc(pdfFile); // PDFDoc pdfDoc = new PDFDoc(pdfFile);
//
ElementWriter writer = new ElementWriter(); // ElementWriter writer = new ElementWriter();
ElementReader reader = new ElementReader(); // ElementReader reader = new ElementReader();
Set<Long> visitedXObjIds = new TreeSet<>(); // Set<Long> visitedXObjIds = new TreeSet<>();
//
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) { // for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
//
Page page = iterator.next(); // Page page = iterator.next();
//
visitedXObjIds.add(page.getSDFObj().getObjNum()); // visitedXObjIds.add(page.getSDFObj().getObjNum());
//
//
InvisibleElementRemovalContext context = InvisibleElementRemovalContext.builder() // InvisibleElementRemovalContext context = InvisibleElementRemovalContext.builder()
.reader(reader) // .reader(reader)
.clippingPathStack(new ClippingPathStack(page.getMediaBox())) // .clippingPathStack(new ClippingPathStack(page.getMediaBox()))
.delta(delta) // .delta(delta)
.overlappedElements(new ArrayList<>()) // .overlappedElements(new ArrayList<>())
.visibleElements(new ArrayList<>()) // .visibleElements(new ArrayList<>())
.visitedXObjIds(visitedXObjIds) // .visitedXObjIds(visitedXObjIds)
.build(); // .build();
//
removeClippedElementsAndInvisibleTextAndRememberOverlappedElements(page, writer, context); // removeClippedElementsAndInvisibleTextAndRememberOverlappedElements(page, writer, context);
//
context.visitedXObjIds().clear(); // context.visitedXObjIds().clear();
//
removeOverlappedElements(page, writer, context); // removeOverlappedElements(page, writer, context);
} // }
//
try { // try {
pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null); // pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
} catch (Exception e) { // } catch (Exception e) {
log.error("File could not be saved after invisible element removal"); // log.error("File could not be saved after invisible element removal");
throw new RuntimeException(e); // throw new RuntimeException(e);
} // }
//
writer.destroy(); // writer.destroy();
reader.destroy(); // reader.destroy();
pdfDoc.close(); // pdfDoc.close();
} // }
//
//
private void removeClippedElementsAndInvisibleTextAndRememberOverlappedElements(Page page, // private void removeClippedElementsAndInvisibleTextAndRememberOverlappedElements(Page page,
ElementWriter writer, // ElementWriter writer,
InvisibleElementRemovalContext context) throws PDFNetException { // InvisibleElementRemovalContext context) throws PDFNetException {
//
context.reader().begin(page); // context.reader().begin(page);
writer.begin(page, ElementWriter.e_replacement, false, true, page.getResourceDict()); // writer.begin(page, ElementWriter.e_replacement, false, true, page.getResourceDict());
processElements(writer, context); // processElements(writer, context);
writer.end(); // writer.end();
context.reader().end(); // context.reader().end();
} // }
//
//
private void processElements(ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException { // private void processElements(ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
//
for (Element element = context.reader().next(); element != null; element = context.reader().next()) // for (Element element = context.reader().next(); element != null; element = context.reader().next())
switch (element.getType()) { // switch (element.getType()) {
case Element.e_image, Element.e_inline_image -> processImages(element, writer, context); // case Element.e_image, Element.e_inline_image -> processImages(element, writer, context);
case Element.e_text -> processText(element, writer, context); // case Element.e_text -> processText(element, writer, context);
case Element.e_path -> processPath(element, writer, context); // case Element.e_path -> processPath(element, writer, context);
case Element.e_form -> processForm(element, writer, context); // case Element.e_form -> processForm(element, writer, context);
case Element.e_group_begin -> { // case Element.e_group_begin -> {
context.clippingPathStack().enterNewGState(); // context.clippingPathStack().enterNewGState();
writer.writeElement(element); // writer.writeElement(element);
} // }
case Element.e_group_end -> { // case Element.e_group_end -> {
context.clippingPathStack().leaveGState(); // context.clippingPathStack().leaveGState();
writer.writeElement(element); // writer.writeElement(element);
} // }
default -> writer.writeElement(element); // default -> writer.writeElement(element);
} // }
} // }
//
//
private void processImages(Element imageElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException { // private void processImages(Element imageElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
//
Rect rect = imageElement.getBBox(); // Rect rect = imageElement.getBBox();
//
if (rect == null) { // if (rect == null) {
return; // return;
} // }
//
boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight()); // boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
//
if (!context.delta() && inClippingPath) { // if (!context.delta() && inClippingPath) {
context.visibleElements().add(ElementFeatures.extractFeatures(imageElement)); // context.visibleElements().add(ElementFeatures.extractFeatures(imageElement));
} // }
//
if (context.delta() ^ inClippingPath) { // if (context.delta() ^ inClippingPath) {
writer.writeElement(imageElement); // writer.writeElement(imageElement);
} // }
} // }
//
//
private void processText(Element textElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException { // private void processText(Element textElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
//
Rect rect = textElement.getBBox(); // Rect rect = textElement.getBBox();
//
if (rect == null) { // if (rect == null) {
writer.writeElement(textElement); // writer.writeElement(textElement);
return; // return;
} // }
//
GState gState = textElement.getGState(); // GState gState = textElement.getGState();
//
boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight()); // boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
//
boolean isTextVisible = isTextRenderedVisibly(gState); // boolean isTextVisible = isTextRenderedVisibly(gState);
//
if (inClippingPath && isTextVisible) { // if (inClippingPath && isTextVisible) {
context.visibleElements().add(ElementFeatures.extractFeatures(textElement)); // context.visibleElements().add(ElementFeatures.extractFeatures(textElement));
} // }
if (!context.delta()) { // if (!context.delta()) {
if (inClippingPath && isTextVisible) { // if (inClippingPath && isTextVisible) {
writer.writeElement(textElement); // writer.writeElement(textElement);
} else if (textElement.hasTextMatrix()) { // } else if (textElement.hasTextMatrix()) {
/* // /*
PDFTron Element with type "text" refers to a Tj command. If a Tm command is just above it in the pdf file, PDFTron will join the two commands and treat them as one Element. // PDFTron Element with type "text" refers to a Tj command. If a Tm command is just above it in the pdf file, PDFTron will join the two commands and treat them as one Element.
hasTextMatrix() checks for this case specifically. Also, Tm changes the position for a whole BT/ET segment, possibly containing multiple Tj commands. // hasTextMatrix() checks for this case specifically. Also, Tm changes the position for a whole BT/ET segment, possibly containing multiple Tj commands.
Therefore, the position of a following Tj is affected by not writing the first Element. // Therefore, the position of a following Tj is affected by not writing the first Element.
This is why, we write only the Tm command: // This is why, we write only the Tm command:
*/ // */
writer.writeGStateChanges(textElement); // writer.writeGStateChanges(textElement);
} // }
} else { // } else {
if (!inClippingPath) { // if (!inClippingPath) {
gState.setFillColorSpace(ColorSpace.createDeviceRGB()); // gState.setFillColorSpace(ColorSpace.createDeviceRGB());
// red for elements removed by clipping path // // red for elements removed by clipping path
gState.setFillColor(new ColorPt(1, 0, 0)); // gState.setFillColor(new ColorPt(1, 0, 0));
writer.writeElement(textElement); // writer.writeElement(textElement);
} // }
if (!isTextVisible) { // if (!isTextVisible) {
gState.setFillColorSpace(ColorSpace.createDeviceRGB()); // gState.setFillColorSpace(ColorSpace.createDeviceRGB());
// blue for elements removed due to transparency or not rendered // // blue for elements removed due to transparency or not rendered
gState.setFillColor(new ColorPt(0, 0, 1)); // gState.setFillColor(new ColorPt(0, 0, 1));
gState.setTextRenderMode(GState.e_fill_text); // gState.setTextRenderMode(GState.e_fill_text);
gState.setFillOpacity(1); // gState.setFillOpacity(1);
writer.writeElement(textElement); // writer.writeElement(textElement);
} // }
} // }
} // }
//
//
private void processForm(Element formElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException { // private void processForm(Element formElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
//
writer.writeElement(formElement); // writer.writeElement(formElement);
Obj formObj = formElement.getXObject(); // Obj formObj = formElement.getXObject();
//
if (!context.visitedXObjIds().contains(formObj.getObjNum())) { // if (!context.visitedXObjIds().contains(formObj.getObjNum())) {
context.visitedXObjIds().add(formObj.getObjNum()); // context.visitedXObjIds().add(formObj.getObjNum());
// writer needs to be newly initialized when entering a new content stream // // writer needs to be newly initialized when entering a new content stream
// see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest) // // see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest)
ElementWriter formWriter = new ElementWriter(); // ElementWriter formWriter = new ElementWriter();
context.reader().formBegin(); // context.reader().formBegin();
formWriter.begin(formObj); // formWriter.begin(formObj);
//
context.reader().clearChangeList(); // context.reader().clearChangeList();
formWriter.setDefaultGState(context.reader()); // formWriter.setDefaultGState(context.reader());
//
processElements(formWriter, context); // processElements(formWriter, context);
formWriter.end(); // formWriter.end();
formWriter.destroy(); // formWriter.destroy();
context.reader().end(); // context.reader().end();
} // }
} // }
//
//
private void processPath(Element pathElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException { // private void processPath(Element pathElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
//
PathData pathData = pathElement.getPathData(); // PathData pathData = pathElement.getPathData();
//
if (pathData.getOperators().length == 0 && pathData.getPoints().length == 0) { // if (pathData.getOperators().length == 0 && pathData.getPoints().length == 0) {
writer.writeGStateChanges(pathElement); // writer.writeGStateChanges(pathElement);
return; // return;
} // }
//
GeneralPath linePath = convertToGeneralPath(pathData); // GeneralPath linePath = convertToGeneralPath(pathData);
//
//transform path to initial user space // //transform path to initial user space
var ctm = pathElement.getCTM(); // var ctm = pathElement.getCTM();
var affineTransform = toAffineTransform(ctm); // var affineTransform = toAffineTransform(ctm);
linePath.transform(affineTransform); // linePath.transform(affineTransform);
//
var rect = linePath.getBounds2D(); // var rect = linePath.getBounds2D();
//
boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX(), rect.getY(), rect.getWidth(), rect.getHeight()); // boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX(), rect.getY(), rect.getWidth(), rect.getHeight());
//
if (pathElement.isClippingPath()) { // if (pathElement.isClippingPath()) {
if (pathElement.isClipWindingFill()) { // if (pathElement.isClipWindingFill()) {
linePath.setWindingRule(GeneralPath.WIND_NON_ZERO); // linePath.setWindingRule(GeneralPath.WIND_NON_ZERO);
} else { // } else {
linePath.setWindingRule(GeneralPath.WIND_EVEN_ODD); // linePath.setWindingRule(GeneralPath.WIND_EVEN_ODD);
} // }
//
context.clippingPathStack().intersectClippingPath(linePath); // context.clippingPathStack().intersectClippingPath(linePath);
pathElement.setPathClip(!context.delta()); // pathElement.setPathClip(!context.delta());
writer.writeElement(pathElement); // writer.writeElement(pathElement);
//
} else { // } else {
if (pathElement.isWindingFill()) { // if (pathElement.isWindingFill()) {
linePath.setWindingRule(GeneralPath.WIND_NON_ZERO); // linePath.setWindingRule(GeneralPath.WIND_NON_ZERO);
} else { // } else {
linePath.setWindingRule(GeneralPath.WIND_EVEN_ODD); // linePath.setWindingRule(GeneralPath.WIND_EVEN_ODD);
} // }
//
if (inClippingPath) { // if (inClippingPath) {
if (isFilledAndNonTransparent(pathElement)) { // if (isFilledAndNonTransparent(pathElement)) {
List<ElementFeatures> currentOverlappedElements = context.visibleElements() // List<ElementFeatures> currentOverlappedElements = context.visibleElements()
.stream() // .stream()
.filter(features -> almostContains(linePath, features.getBoundingBox())) // .filter(features -> almostContains(linePath, features.getBoundingBox()))
.toList(); // .toList();
context.overlappedElements().addAll(currentOverlappedElements); // context.overlappedElements().addAll(currentOverlappedElements);
context.visibleElements().removeAll(currentOverlappedElements); // context.visibleElements().removeAll(currentOverlappedElements);
} // }
context.visibleElements().add(ElementFeatures.extractFeatures(pathElement)); // context.visibleElements().add(ElementFeatures.extractFeatures(pathElement));
if (!context.delta()) { // if (!context.delta()) {
writer.writeElement(pathElement); // writer.writeElement(pathElement);
} // }
} // }
if (context.delta() && !inClippingPath) { // if (context.delta() && !inClippingPath) {
pathElement.getGState().setFillColorSpace(ColorSpace.createDeviceRGB()); // pathElement.getGState().setFillColorSpace(ColorSpace.createDeviceRGB());
pathElement.getGState().setFillColor(new ColorPt(1, 0, 0)); // pathElement.getGState().setFillColor(new ColorPt(1, 0, 0));
pathElement.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB()); // pathElement.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
pathElement.getGState().setStrokeColor(new ColorPt(1, 0, 0)); // pathElement.getGState().setStrokeColor(new ColorPt(1, 0, 0));
writer.writeElement(pathElement); // writer.writeElement(pathElement);
} // }
} // }
} // }
//
//
private void removeOverlappedElements(Page page, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException { // private void removeOverlappedElements(Page page, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
//
context.reader().begin(page); // context.reader().begin(page);
writer.begin(page, ElementWriter.e_replacement, false, true, page.getResourceDict()); // writer.begin(page, ElementWriter.e_replacement, false, true, page.getResourceDict());
if (context.delta()) { // if (context.delta()) {
// green for element removed due to overlapping // // green for element removed due to overlapping
context.overlappedElements().forEach(feature -> drawBBox(writer, feature.getBoundingBox(), "#00FF00")); // context.overlappedElements().forEach(feature -> drawBBox(writer, feature.getBoundingBox(), "#00FF00"));
context.overlappedElements().clear(); // context.overlappedElements().clear();
} // }
processOverlappedElements(writer, context); // processOverlappedElements(writer, context);
writer.end(); // writer.end();
context.reader().end(); // context.reader().end();
//
if (context.overlappedElements().size() > 0) { // if (context.overlappedElements().size() > 0) {
log.warn(context.overlappedElements().size() + " overlapped elements have not been found or removed"); // log.warn(context.overlappedElements().size() + " overlapped elements have not been found or removed");
} // }
} // }
//
//
private void processOverlappedElements(ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException { // private void processOverlappedElements(ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
//
for (Element element = context.reader().next(); element != null; element = context.reader().next()) { // for (Element element = context.reader().next(); element != null; element = context.reader().next()) {
switch (element.getType()) { // switch (element.getType()) {
case Element.e_form -> processFormOverlappedElements(writer, element, context); // case Element.e_form -> processFormOverlappedElements(writer, element, context);
case Element.e_path, Element.e_image, Element.e_inline_image, Element.e_text -> { // case Element.e_path, Element.e_image, Element.e_inline_image, Element.e_text -> {
boolean anyMatch = false; // boolean anyMatch = false;
for (ElementFeatures elementToRemove : context.overlappedElements()) { // for (ElementFeatures elementToRemove : context.overlappedElements()) {
if (elementToRemove.almostMatches(element)) { // if (elementToRemove.almostMatches(element)) {
context.overlappedElements().remove(elementToRemove); // context.overlappedElements().remove(elementToRemove);
anyMatch = true; // anyMatch = true;
break; // break;
} // }
} // }
if (!anyMatch) { // if (!anyMatch) {
writer.writeElement(element); // writer.writeElement(element);
} else if (element.getType() == 3 && element.hasTextMatrix()) { // } else if (element.getType() == 3 && element.hasTextMatrix()) {
/* // /*
PDFTron Element with type "text" refers to a Tj command. If a Tm command is just above it in the pdf file, PDFTron will join the two commands and treat them as one Element. // PDFTron Element with type "text" refers to a Tj command. If a Tm command is just above it in the pdf file, PDFTron will join the two commands and treat them as one Element.
hasTextMatrix() checks for this case specifically. Also, Tm changes the position for a whole BT/ET segment, possibly containing multiple Tj commands. // hasTextMatrix() checks for this case specifically. Also, Tm changes the position for a whole BT/ET segment, possibly containing multiple Tj commands.
Therefore, the position of a following Tj is affected by not writing the first Element. // Therefore, the position of a following Tj is affected by not writing the first Element.
This is why, we write only the Tm command: // This is why, we write only the Tm command:
*/ // */
writer.writeGStateChanges(element); // writer.writeGStateChanges(element);
} // }
} // }
default -> writer.writeElement(element); // default -> writer.writeElement(element);
} // }
} // }
} // }
//
//
private void processFormOverlappedElements(ElementWriter writer, Element formElement, InvisibleElementRemovalContext context) throws PDFNetException { // private void processFormOverlappedElements(ElementWriter writer, Element formElement, InvisibleElementRemovalContext context) throws PDFNetException {
//
writer.writeElement(formElement); // writer.writeElement(formElement);
Obj formObj = formElement.getXObject(); // Obj formObj = formElement.getXObject();
//
if (!context.visitedXObjIds().contains(formObj.getObjNum())) { // if (!context.visitedXObjIds().contains(formObj.getObjNum())) {
context.visitedXObjIds().add(formObj.getObjNum()); // context.visitedXObjIds().add(formObj.getObjNum());
// writer needs to be newly initialized when entering a new content stream // // writer needs to be newly initialized when entering a new content stream
// see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest) // // see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest)
ElementWriter formWriter = new ElementWriter(); // ElementWriter formWriter = new ElementWriter();
context.reader().formBegin(); // context.reader().formBegin();
formWriter.begin(formObj); // formWriter.begin(formObj);
//
context.reader().clearChangeList(); // context.reader().clearChangeList();
formWriter.setDefaultGState(context.reader()); // formWriter.setDefaultGState(context.reader());
//
processOverlappedElements(formWriter, context); // processOverlappedElements(formWriter, context);
formWriter.end(); // formWriter.end();
formWriter.destroy(); // formWriter.destroy();
context.reader().end(); // context.reader().end();
} // }
} // }
//
//
private boolean isTextRenderedVisibly(GState gState) throws PDFNetException { // private boolean isTextRenderedVisibly(GState gState) throws PDFNetException {
//
return gState.getTextRenderMode() != GState.e_invisible_text && // // return gState.getTextRenderMode() != GState.e_invisible_text && //
!(gState.getTextRenderMode() == GState.e_fill_text && gState.getFillOpacity() == 0) && // // !(gState.getTextRenderMode() == GState.e_fill_text && gState.getFillOpacity() == 0) && //
!(gState.getTextRenderMode() == GState.e_stroke_text && gState.getStrokeOpacity() == 0) && // // !(gState.getTextRenderMode() == GState.e_stroke_text && gState.getStrokeOpacity() == 0) && //
!(gState.getTextRenderMode() == GState.e_fill_stroke_text && gState.getFillOpacity() == 0 && gState.getStrokeOpacity() == 0); // !(gState.getTextRenderMode() == GState.e_fill_stroke_text && gState.getFillOpacity() == 0 && gState.getStrokeOpacity() == 0);
} // }
//
//
private GeneralPath convertToGeneralPath(PathData pathData) throws PDFNetException { // private GeneralPath convertToGeneralPath(PathData pathData) throws PDFNetException {
//
GeneralPath linePath = new GeneralPath(); // GeneralPath linePath = new GeneralPath();
Iterator<Double> points = Doubles.asList(pathData.getPoints()).iterator(); // Iterator<Double> points = Doubles.asList(pathData.getPoints()).iterator();
Iterable<Byte> operators = Bytes.asList(pathData.getOperators()); // Iterable<Byte> operators = Bytes.asList(pathData.getOperators());
for (var operator : operators) { // for (var operator : operators) {
switch (operator) { // switch (operator) {
case PathData.e_moveto -> linePath.moveTo(points.next(), points.next()); // case PathData.e_moveto -> linePath.moveTo(points.next(), points.next());
case PathData.e_lineto -> linePath.lineTo(points.next(), points.next()); // case PathData.e_lineto -> linePath.lineTo(points.next(), points.next());
case PathData.e_cubicto -> linePath.curveTo(points.next(), points.next(), points.next(), points.next(), points.next(), points.next()); // case PathData.e_cubicto -> linePath.curveTo(points.next(), points.next(), points.next(), points.next(), points.next(), points.next());
case PathData.e_closepath -> linePath.closePath(); // case PathData.e_closepath -> linePath.closePath();
case PathData.e_rect -> { // case PathData.e_rect -> {
double x = points.next(); // double x = points.next();
double y = points.next(); // double y = points.next();
double w = points.next(); // double w = points.next();
double h = points.next(); // double h = points.next();
linePath.moveTo(x, y); // linePath.moveTo(x, y);
linePath.lineTo(x + w, y); // linePath.lineTo(x + w, y);
linePath.lineTo(x + w, y + h); // linePath.lineTo(x + w, y + h);
linePath.lineTo(x, y + h); // linePath.lineTo(x, y + h);
linePath.closePath(); // linePath.closePath();
} // }
default -> throw new PDFNetException("Invalid Element Type", 0, "", "", ""); // default -> throw new PDFNetException("Invalid Element Type", 0, "", "", "");
} // }
} // }
return linePath; // return linePath;
} // }
//
//
private boolean almostContains(Shape outer, Rectangle2D inner) { // private boolean almostContains(Shape outer, Rectangle2D inner) {
//To address inconsistencies in the calculation of the bounding box we slightly shrink the inner rectangle // //To address inconsistencies in the calculation of the bounding box we slightly shrink the inner rectangle
//
double x_with_tolerance = inner.getX() >= 0 ? inner.getX() + TOLERANCE : inner.getX() - TOLERANCE; // double x_with_tolerance = inner.getX() >= 0 ? inner.getX() + TOLERANCE : inner.getX() - TOLERANCE;
double y_with_tolerance = inner.getY() >= 0 ? inner.getY() + TOLERANCE : inner.getY() - TOLERANCE; // double y_with_tolerance = inner.getY() >= 0 ? inner.getY() + TOLERANCE : inner.getY() - TOLERANCE;
double height_with_tolerance = inner.getHeight() - (2 * TOLERANCE); // double height_with_tolerance = inner.getHeight() - (2 * TOLERANCE);
double width_with_tolerance = inner.getWidth() - (2 * TOLERANCE); // double width_with_tolerance = inner.getWidth() - (2 * TOLERANCE);
Rectangle2D innerRect = new Rectangle2D.Double(x_with_tolerance, y_with_tolerance, width_with_tolerance, height_with_tolerance); // Rectangle2D innerRect = new Rectangle2D.Double(x_with_tolerance, y_with_tolerance, width_with_tolerance, height_with_tolerance);
//
return outer.contains(innerRect); // return outer.contains(innerRect);
} // }
//
//
private boolean isFilledAndNonTransparent(Element element) throws PDFNetException { // private boolean isFilledAndNonTransparent(Element element) throws PDFNetException {
//
return element.isFilled() && element.getGState().getFillOpacity() == 1; // return element.isFilled() && element.getGState().getFillOpacity() == 1;
} // }
//
//
@SneakyThrows // @SneakyThrows
private void drawBBox(ElementWriter writer, Rectangle2D r, String hexcolor) { // private void drawBBox(ElementWriter writer, Rectangle2D r, String hexcolor) {
//
ColorPt colorPt = new ColorPt(Integer.valueOf(hexcolor.substring(1, 3), 16) / 255d, // ColorPt colorPt = new ColorPt(Integer.valueOf(hexcolor.substring(1, 3), 16) / 255d,
Integer.valueOf(hexcolor.substring(3, 5), 16) / 255d, // Integer.valueOf(hexcolor.substring(3, 5), 16) / 255d,
Integer.valueOf(hexcolor.substring(5, 7), 16) / 255d); // Integer.valueOf(hexcolor.substring(5, 7), 16) / 255d);
ElementBuilder eb = new ElementBuilder(); // ElementBuilder eb = new ElementBuilder();
Element rect = eb.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight()); // Element rect = eb.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight());
rect.setPathStroke(true); // rect.setPathStroke(true);
rect.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB()); // rect.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
rect.getGState().setStrokeColor(colorPt); // rect.getGState().setStrokeColor(colorPt);
writer.writePlacedElement(rect); // writer.writePlacedElement(rect);
//
colorPt.destroy(); // colorPt.destroy();
eb.destroy(); // eb.destroy();
} // }
//
//
private static AffineTransform toAffineTransform(Matrix2D ctm) throws PDFNetException { // private static AffineTransform toAffineTransform(Matrix2D ctm) throws PDFNetException {
//
return new AffineTransform(ctm.getA(), ctm.getB(), ctm.getC(), ctm.getD(), ctm.getH(), ctm.getV()); // return new AffineTransform(ctm.getA(), ctm.getB(), ctm.getC(), ctm.getD(), ctm.getH(), ctm.getV());
} // }
//
//
@Builder // @Builder
private record InvisibleElementRemovalContext( // private record InvisibleElementRemovalContext(
boolean delta, // boolean delta,
ElementReader reader, // ElementReader reader,
ClippingPathStack clippingPathStack, // ClippingPathStack clippingPathStack,
List<ElementFeatures> overlappedElements, // List<ElementFeatures> overlappedElements,
List<ElementFeatures> visibleElements, // List<ElementFeatures> visibleElements,
Set<Long> visitedXObjIds) { // Set<Long> visitedXObjIds) {
//
} // }
//
} //}

View File

@ -14,6 +14,7 @@ import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService;
import com.iqser.red.service.ocr.v1.api.model.OCRStatusUpdateResponse; import com.iqser.red.service.ocr.v1.api.model.OCRStatusUpdateResponse;
import com.iqser.red.service.ocr.v1.server.configuration.MessagingConfiguration; import com.iqser.red.service.ocr.v1.server.configuration.MessagingConfiguration;
import com.iqser.red.service.ocr.v1.server.settings.OcrServiceSettings; import com.iqser.red.service.ocr.v1.server.settings.OcrServiceSettings;
@ -69,10 +70,10 @@ public class OCRService {
try (ByteArrayOutputStream transferOutputStream = new ByteArrayOutputStream()) { try (ByteArrayOutputStream transferOutputStream = new ByteArrayOutputStream()) {
try (InputStream fileStream = fileStorageService.getOriginalFileAsStream(dossierId, fileId)) { try (InputStream fileStream = fileStorageService.getOriginalFileAsStream(dossierId, fileId)) {
long removalStart = System.currentTimeMillis(); long removalStart = System.currentTimeMillis();
log.debug("Start invisible element removal for file with dossierId {} and fileId {}", dossierId, fileId); log.info("Start invisible element removal for file with dossierId {} and fileId {}", dossierId, fileId);
invisibleElementRemovalService.removeInvisibleElements(fileStream, transferOutputStream, false); invisibleElementRemovalService.removeInvisibleElements(fileStream, transferOutputStream, false);
long removalEnd = System.currentTimeMillis(); long removalEnd = System.currentTimeMillis();
log.debug("Invisible element removal successful for file with dossierId {} and fileId {}, took {}s", log.info("Invisible element removal successful for file with dossierId {} and fileId {}, took {}s",
dossierId, dossierId,
fileId, fileId,
format("%.1f", (removalEnd - removalStart) / 1000.0)); format("%.1f", (removalEnd - removalStart) / 1000.0));

View File

@ -9,16 +9,18 @@ import java.io.FileOutputStream;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.core.io.ClassPathResource; import org.springframework.core.io.ClassPathResource;
import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService;
import com.iqser.red.service.ocr.v1.server.AbstractTest; import com.iqser.red.service.ocr.v1.server.AbstractTest;
import lombok.SneakyThrows; import lombok.SneakyThrows;
public class InvisibleElementRemovalServiceTest extends AbstractTest { public class InvisibleElementRemovalServiceTest extends AbstractTest {
@Autowired @Autowired
private InvisibleElementRemovalService invisibleElementRemovalService; private InvisibleElementRemovalService invisibleElementRemovalService;
@Test @Test