RED-9746: Document hardly editable
* revert quadtree lookup, since the lib does not seem to work reliably, also, no significant speed boost * check each individual glyph instead of only a text run and remember past overlaps in glyph * added logic to extract all glyphs exactly * check for optional content or transparency in form objects and marked content
This commit is contained in:
parent
aa3823c9db
commit
01d1b35220
@ -38,6 +38,7 @@ import com.pdftron.pdf.Page;
|
||||
import com.pdftron.pdf.PageIterator;
|
||||
import com.pdftron.pdf.PathData;
|
||||
import com.pdftron.pdf.Rect;
|
||||
import com.pdftron.pdf.ocg.Group;
|
||||
import com.pdftron.sdf.Obj;
|
||||
import com.pdftron.sdf.SDFDoc;
|
||||
|
||||
@ -172,7 +173,7 @@ public class InvisibleElementRemovalService {
|
||||
InvisibleElementRemovalContext context = InvisibleElementRemovalContext.builder()
|
||||
.reader(reader)
|
||||
.clippingPathStack(new ClippingPathStack(page.getMediaBox()))
|
||||
.markedContentStack(new MarkedContentStack())
|
||||
.markedContentStack(new MarkedContentStack(pdfDoc))
|
||||
.removePaths(removePaths)
|
||||
.delta(delta)
|
||||
.overlappedElements(new ElementFeatureLookup())
|
||||
@ -251,10 +252,9 @@ public class InvisibleElementRemovalService {
|
||||
}
|
||||
|
||||
boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
|
||||
|
||||
if (inClippingPath) {
|
||||
ImageFeatures imageFeatures = ElementFeatureFactory.buildImage(imageElement);
|
||||
if (!(imageFeatures.isTransparent() || imageFeatures.isImageMask() || imageFeatures.isSoftMask())) {
|
||||
if (!(context.markedContentStack.contextHasTransparency() || imageFeatures.isTransparent() || imageFeatures.isImageMask() || imageFeatures.isSoftMask())) {
|
||||
calculateOverlaps(context, imageFeatures);
|
||||
}
|
||||
context.visibleElements().add(imageFeatures);
|
||||
@ -328,9 +328,11 @@ public class InvisibleElementRemovalService {
|
||||
|
||||
if (!context.visitedXObjIds().contains(formObj.getObjNum())) {
|
||||
context.visitedXObjIds().add(formObj.getObjNum());
|
||||
|
||||
// writer needs to be newly initialized when entering a new content stream
|
||||
// see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest)
|
||||
try (ElementWriter formWriter = new ElementWriter()) {
|
||||
context.markedContentStack.enterForm(formElement);
|
||||
context.clippingPathStack().enterNewGState();
|
||||
try (var formElementBBOX = formElement.getBBox()) {
|
||||
context.clippingPathStack().intersectClippingPath(Converter.toRectangle2D(formElementBBOX));
|
||||
@ -344,6 +346,7 @@ public class InvisibleElementRemovalService {
|
||||
formWriter.end();
|
||||
context.reader().end();
|
||||
context.clippingPathStack().leaveGState();
|
||||
context.markedContentStack.leaveForm();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -386,7 +389,7 @@ public class InvisibleElementRemovalService {
|
||||
}
|
||||
|
||||
if (inClippingPath) {
|
||||
if (isFilledAndNonTransparent(pathElement)) {
|
||||
if (!context.markedContentStack.contextHasTransparency() && isFilledAndNonTransparent(pathElement)) {
|
||||
calculateOverlaps(context, pathFeatures);
|
||||
}
|
||||
context.visibleElements().add(ElementFeatureFactory.extractFeatures(pathElement));
|
||||
@ -473,8 +476,16 @@ public class InvisibleElementRemovalService {
|
||||
|
||||
private static void removeOverlappedElement(ElementWriter writer, InvisibleElementRemovalContext context, Element element) throws PDFNetException {
|
||||
|
||||
try (Rect bbox = element.getBBox()) {
|
||||
if (bbox == null) {
|
||||
writer.writeElement(element);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
Optional<ElementFeatures> optionalElementMatch = context.overlappedElements()
|
||||
.anyMatch(ElementFeatureFactory.extractFeatures(element));
|
||||
|
||||
if (optionalElementMatch.isPresent()) {
|
||||
context.overlappedElements().remove(optionalElementMatch.get());
|
||||
if (element.getType() == 3 && element.hasTextMatrix()) {
|
||||
|
||||
@ -3,16 +3,45 @@ package com.iqser.red.pdftronlogic.commons;
|
||||
import java.util.Deque;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
|
||||
import com.pdftron.pdf.Element;
|
||||
import com.pdftron.pdf.PDFDoc;
|
||||
import com.pdftron.pdf.ocg.Group;
|
||||
import com.pdftron.sdf.Obj;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@RequiredArgsConstructor
|
||||
public class MarkedContentStack {
|
||||
|
||||
private final PDFDoc pdfDoc;
|
||||
Deque<MarkedContent> stack = new LinkedList<>();
|
||||
Deque<Form> formStack = new LinkedList<>();
|
||||
|
||||
|
||||
public void enterMarkedContent(String name) {
|
||||
|
||||
stack.push(new MarkedContent(name));
|
||||
stack.push(new MarkedContent(name, name.startsWith("OC")));
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void enterForm(Element formElement) {
|
||||
|
||||
Obj oc = formElement.getXObject().findObj("OC");
|
||||
Obj group = formElement.getXObject().findObj("Group");
|
||||
boolean transparency = false;
|
||||
if (group != null) {
|
||||
Obj groupSubType = group.findObj("S");
|
||||
if (groupSubType != null && groupSubType.isName() && groupSubType.getName().equals("Transparency")) {
|
||||
transparency = true;
|
||||
}
|
||||
}
|
||||
|
||||
formStack.push(new Form(formElement.getXObject().getObjNum(), oc != null, transparency));
|
||||
}
|
||||
|
||||
|
||||
@ -66,7 +95,27 @@ public class MarkedContentStack {
|
||||
}
|
||||
|
||||
|
||||
private record MarkedContent(String name) {
|
||||
public boolean contextHasTransparency() {
|
||||
|
||||
return formStack.stream()
|
||||
.anyMatch(form -> form.optionalContent || form.transparency) //
|
||||
|| stack.stream()
|
||||
.anyMatch(MarkedContent::optionalContent);
|
||||
}
|
||||
|
||||
|
||||
public void leaveForm() {
|
||||
|
||||
formStack.pop();
|
||||
|
||||
}
|
||||
|
||||
|
||||
private record MarkedContent(String name, boolean optionalContent) {
|
||||
|
||||
}
|
||||
|
||||
private record Form(long ref, boolean optionalContent, boolean transparency) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -54,7 +54,7 @@ public class VisualEqualityTest {
|
||||
@SneakyThrows
|
||||
public void assertVisualEqualityOfProcessedFile() {
|
||||
|
||||
Path file = Path.of("/home/kschuettler/Dokumente/TestFiles/syn-dm-testfiles/ITEM 19_A15149AC - Primary Skin Irritation Rabbit.pdf");
|
||||
Path file = Path.of("/home/kschuettler/Dokumente/TestFiles/syn-dm-testfiles/SOLICITA_VICTRATO-GOLD-II_Item 20_Sensibilizacao_02.pdf");
|
||||
Context context = new Context(TEST_OUTPUT_DIR, new HashMap<>());
|
||||
|
||||
runForFile(file, context);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user