RED-4875 - refactored code so it can get called with both, pdfdoc and in/output-steams

This commit is contained in:
Thomas Beyer 2023-03-17 11:31:48 +01:00
parent 4dc4ddd63a
commit e00e5e5317

View File

@ -12,8 +12,6 @@ import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import org.springframework.stereotype.Service;
import com.google.common.primitives.Bytes;
import com.google.common.primitives.Doubles;
import com.pdftron.common.Matrix2D;
@ -38,33 +36,13 @@ import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
public class InvisibleElementRemovalService {
static public final double TOLERANCE = 1e-3;
/**
* Removes all hidden Text, Path and Image Elements from a PDF Document.
* handled cases:
* -Text which is transparent or is set to not render
* -Elements outside of clipping path
* -Elements that have been painted over by visible and filled Paths
* unhandled cases:
* -Elements covered by widely stroked path
* -Elements with the same color as background
* -Any Text set to clipping with its many interactions with other elements
*
* @param pdfFile The PDF file to process
* @param delta If this flag is set only the removed Elements will be written to the output file.
* The Elements are red if they are removed by clipping path, blue for transparency, and a green bounding box for overlap.
* @param out OutputStream to write the resulting file to
**/
@SneakyThrows
public void removeInvisibleElements(InputStream pdfFile, OutputStream out, boolean delta) {
PDFDoc pdfDoc = new PDFDoc(pdfFile);
private void execute(PDFDoc pdfDoc, boolean delta) {
ElementWriter writer = new ElementWriter();
ElementReader reader = new ElementReader();
Set<Long> visitedXObjIds = new TreeSet<>();
@ -90,21 +68,49 @@ public class InvisibleElementRemovalService {
context.visitedXObjIds().clear();
removeOverlappedElements(page, writer, context);
writer.destroy();
reader.destroy();
}
}
/**
* Removes all hidden Text, Path and Image Elements from a PDF Document.
* handled cases:
* -Text which is transparent or is set to not render
* -Elements outside of clipping path
* -Elements that have been painted over by visible and filled Paths
* unhandled cases:
* -Elements covered by widely stroked path
* -Elements with the same color as background
* -Any Text set to clipping with its many interactions with other elements
*
* @param pdfFile The PDF file to process
* @param delta If this flag is set only the removed Elements will be written to the output file.
* The Elements are red if they are removed by clipping path, blue for transparency, and a green bounding box for overlap.
* @param out OutputStream to write the resulting file to
**/
@SneakyThrows
public void removeInvisibleElements(InputStream pdfFile, OutputStream out, boolean delta) {
PDFDoc pdfDoc = new PDFDoc(pdfFile);
execute(pdfDoc, delta);
try {
pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
} catch (Exception e) {
log.error("File could not be saved after invisible element removal");
throw new RuntimeException(e);
}
writer.destroy();
reader.destroy();
pdfDoc.close();
}
@SneakyThrows
public void removeInvisibleElements(PDFDoc pdfDoc, boolean delta) {
execute(pdfDoc, delta);
}
private void removeClippedElementsAndInvisibleTextAndRememberOverlappedElements(Page page,
ElementWriter writer,
InvisibleElementRemovalContext context) throws PDFNetException {