RED-8212: fix tables for ocred documents
This commit is contained in:
parent
2caa3e92a4
commit
6b6417ed80
@ -8,13 +8,9 @@ import java.awt.geom.Rectangle2D;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.ColorPt;
|
||||
@ -55,16 +51,17 @@ public class InvisibleElementRemovalService {
|
||||
* -Any Text set to clipping with its many interactions with other elements
|
||||
*
|
||||
* @param pdfFile The PDF file to process
|
||||
* @param removePaths If this flag is set, invisible path elements will be removed
|
||||
* @param delta If this flag is set only the removed Elements will be written to the output file.
|
||||
* The Elements are red if they are removed by clipping path, blue for transparency, and a green bounding box for overlap.
|
||||
* @param out OutputStream to write the resulting file to
|
||||
**/
|
||||
@SneakyThrows
|
||||
public void removeInvisibleElements(InputStream pdfFile, OutputStream out, boolean delta) {
|
||||
public void removeInvisibleElements(InputStream pdfFile, OutputStream out, boolean delta, boolean removePaths) {
|
||||
|
||||
PDFDoc pdfDoc = new PDFDoc(pdfFile);
|
||||
|
||||
execute(pdfDoc, delta);
|
||||
execute(pdfDoc, delta, removePaths);
|
||||
|
||||
try {
|
||||
pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
|
||||
@ -79,17 +76,36 @@ public class InvisibleElementRemovalService {
|
||||
|
||||
|
||||
/**
|
||||
* This method is similar to {@link #removeInvisibleElements(InputStream, OutputStream, boolean)}, just with a PDFDoc.
|
||||
* This method is equal to {@link #removeInvisibleElements(InputStream, OutputStream, boolean, boolean)}, with removePaths == true.
|
||||
*/
|
||||
@SneakyThrows
|
||||
public void removeInvisibleElements(InputStream pdfFile, OutputStream out, boolean delta) {
|
||||
|
||||
removeInvisibleElements(pdfFile, out, delta, true);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is similar to {@link #removeInvisibleElements(InputStream, OutputStream, boolean, boolean)}, just with a PDFDoc.
|
||||
*/
|
||||
@SneakyThrows
|
||||
public void removeInvisibleElements(PDFDoc pdfDoc, boolean removePaths, boolean delta) {
|
||||
|
||||
execute(pdfDoc, delta, removePaths);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is equal to {@link #removeInvisibleElements(PDFDoc, boolean, boolean)}, with removePaths == true.
|
||||
*/
|
||||
@SneakyThrows
|
||||
public void removeInvisibleElements(PDFDoc pdfDoc, boolean delta) {
|
||||
|
||||
execute(pdfDoc, delta);
|
||||
execute(pdfDoc, delta, true);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void execute(PDFDoc pdfDoc, boolean delta) {
|
||||
private void execute(PDFDoc pdfDoc, boolean delta, boolean removePaths) {
|
||||
|
||||
log.info("Start removing invisible Elements");
|
||||
ElementWriter writer = new ElementWriter();
|
||||
@ -105,6 +121,7 @@ public class InvisibleElementRemovalService {
|
||||
InvisibleElementRemovalContext context = InvisibleElementRemovalContext.builder()
|
||||
.reader(reader)
|
||||
.clippingPathStack(new ClippingPathStack(page.getMediaBox()))
|
||||
.removePaths(removePaths)
|
||||
.delta(delta)
|
||||
.overlappedElements(new ArrayList<>())
|
||||
.visibleElements(new ArrayList<>())
|
||||
@ -297,11 +314,11 @@ public class InvisibleElementRemovalService {
|
||||
context.visibleElements().removeAll(currentOverlappedElements);
|
||||
}
|
||||
context.visibleElements().add(ElementFeatureFactory.extractFeatures(pathElement));
|
||||
if (!context.delta()) {
|
||||
if (!context.delta() || !context.removePaths()) {
|
||||
writer.writeElement(pathElement);
|
||||
}
|
||||
}
|
||||
if (context.delta() && !inClippingPath) {
|
||||
if (context.delta() && !inClippingPath && context.removePaths()) {
|
||||
pathElement.getGState().setFillColorSpace(ColorSpace.createDeviceRGB());
|
||||
pathElement.getGState().setFillColor(new ColorPt(1, 0, 0));
|
||||
pathElement.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
|
||||
@ -336,25 +353,12 @@ public class InvisibleElementRemovalService {
|
||||
for (Element element = context.reader().next(); element != null; element = context.reader().next()) {
|
||||
switch (element.getType()) {
|
||||
case Element.e_form -> processFormOverlappedElements(writer, element, context);
|
||||
case Element.e_path, Element.e_image, Element.e_inline_image, Element.e_text -> {
|
||||
boolean anyMatch = false;
|
||||
for (ElementFeatures elementToRemove : context.overlappedElements()) {
|
||||
if (elementToRemove.almostMatches(element)) {
|
||||
context.overlappedElements().remove(elementToRemove);
|
||||
anyMatch = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!anyMatch) {
|
||||
case Element.e_image, Element.e_inline_image, Element.e_text -> removeOverlappedElement(writer, context, element);
|
||||
case Element.e_path -> {
|
||||
if (context.removePaths()) {
|
||||
removeOverlappedElement(writer, context, element);
|
||||
} else {
|
||||
writer.writeElement(element);
|
||||
} else if (element.getType() == 3 && element.hasTextMatrix()) {
|
||||
/*
|
||||
PDFTron Element with type "text" refers to a Tj command. If a Tm command is just above it in the pdf file, PDFTron will join the two commands and treat them as one Element.
|
||||
hasTextMatrix() checks for this case specifically. Also, Tm changes the position for a whole BT/ET segment, possibly containing multiple Tj commands.
|
||||
Therefore, the position of a following Tj is affected by not writing the first Element.
|
||||
This is why, we write only the Tm command:
|
||||
*/
|
||||
writer.writeGStateChanges(element);
|
||||
}
|
||||
}
|
||||
default -> writer.writeElement(element);
|
||||
@ -363,6 +367,30 @@ public class InvisibleElementRemovalService {
|
||||
}
|
||||
|
||||
|
||||
private static void removeOverlappedElement(ElementWriter writer, InvisibleElementRemovalContext context, Element element) throws PDFNetException {
|
||||
|
||||
boolean anyMatch = false;
|
||||
for (ElementFeatures elementToRemove : context.overlappedElements()) {
|
||||
if (elementToRemove.almostMatches(element)) {
|
||||
context.overlappedElements().remove(elementToRemove);
|
||||
anyMatch = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!anyMatch) {
|
||||
writer.writeElement(element);
|
||||
} else if (element.getType() == 3 && element.hasTextMatrix()) {
|
||||
/*
|
||||
PDFTron Element with type "text" refers to a Tj command. If a Tm command is just above it in the pdf file, PDFTron will join the two commands and treat them as one Element.
|
||||
hasTextMatrix() checks for this case specifically. Also, Tm changes the position for a whole BT/ET segment, possibly containing multiple Tj commands.
|
||||
Therefore, the position of a following Tj is affected by not writing the first Element.
|
||||
This is why, we write only the Tm command:
|
||||
*/
|
||||
writer.writeGStateChanges(element);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void processFormOverlappedElements(ElementWriter writer, Element formElement, InvisibleElementRemovalContext context) throws PDFNetException {
|
||||
|
||||
writer.writeElement(formElement);
|
||||
@ -490,6 +518,7 @@ public class InvisibleElementRemovalService {
|
||||
|
||||
@Builder
|
||||
private record InvisibleElementRemovalContext(
|
||||
boolean removePaths,
|
||||
boolean delta,
|
||||
ElementReader reader,
|
||||
ClippingPathStack clippingPathStack,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user