This commit is contained in:
Andrei Isvoran 2024-02-07 15:06:30 +02:00
parent 4f7f516348
commit e969f79a20
8 changed files with 60 additions and 87 deletions

View File

@ -48,9 +48,9 @@ public class ElementFeatureFactory {
}
private static ElementFeatures.ElementFeaturesText buildText(Element element) throws PDFNetException {
private static ElementFeatures.Text buildText(Element element) throws PDFNetException {
return ElementFeatures.ElementFeaturesText.builder()
return ElementFeatures.Text.builder()
.elementType(element.getType())
.boundingBox(Converter.toRectangle2D(element.getBBox()))
.text(element.getTextString())

View File

@ -103,7 +103,8 @@ public class ElementFeatures {
@Getter
@SuperBuilder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public static class ElementFeaturesText extends ElementFeatures {
@SuppressWarnings("PMD")
public static class Text extends ElementFeatures {
String text;
int font;

View File

@ -35,18 +35,15 @@ public class ImageHashFactory {
@SneakyThrows
private byte[] getBytesOfImage(com.pdftron.pdf.Image inputImage) {
// 0 because the memory filter determines the size
try(var memFilter = new MemoryFilter(0, false)) {
try(var filterWriter = new FilterWriter(memFilter)) {
try(var memFilter = new MemoryFilter(0, false);
var filterWriter = new FilterWriter(memFilter)) {
inputImage.export(filterWriter);
filterWriter.flushAll();
byte[] res = memFilter.getBuffer();
inputImage.export(filterWriter);
filterWriter.flushAll();
byte[] res = memFilter.getBuffer();
memFilter.flushAll();
memFilter.destroy();
filterWriter.destroy();
return res;
}
memFilter.flushAll();
return res;
}
}

View File

@ -151,38 +151,34 @@ public class InvisibleElementRemovalService {
private void execute(PDFDoc pdfDoc, boolean delta, boolean removePaths, Set<String> markedContentToIgnore) {
log.info("Start removing invisible Elements");
try(ElementWriter writer = new ElementWriter()) {
try(ElementReader reader = new ElementReader()) {
Set<Long> visitedXObjIds = new TreeSet<>();
try(ElementWriter writer = new ElementWriter();
ElementReader reader = new ElementReader()) {
Set<Long> visitedXObjIds = new TreeSet<>();
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
Page page = iterator.next();
Page page = iterator.next();
visitedXObjIds.add(page.getSDFObj().getObjNum());
visitedXObjIds.add(page.getSDFObj().getObjNum());
InvisibleElementRemovalContext context = InvisibleElementRemovalContext.builder()
.reader(reader)
.clippingPathStack(new ClippingPathStack(page.getMediaBox()))
.markedContentStack(new MarkedContentStack())
.removePaths(removePaths)
.delta(delta)
.overlappedElements(new ArrayList<>())
.visibleElements(new ArrayList<>())
.visitedXObjIds(visitedXObjIds)
.markedContentToIgnore(markedContentToIgnore)
.build();
InvisibleElementRemovalContext context = InvisibleElementRemovalContext.builder()
.reader(reader)
.clippingPathStack(new ClippingPathStack(page.getMediaBox()))
.markedContentStack(new MarkedContentStack())
.removePaths(removePaths)
.delta(delta)
.overlappedElements(new ArrayList<>())
.visibleElements(new ArrayList<>())
.visitedXObjIds(visitedXObjIds)
.markedContentToIgnore(markedContentToIgnore)
.build();
removeClippedElementsAndInvisibleTextAndRememberOverlappedElements(page, writer, context);
removeClippedElementsAndInvisibleTextAndRememberOverlappedElements(page, writer, context);
context.visitedXObjIds().clear();
context.markedContentStack().clear();
context.visitedXObjIds().clear();
context.markedContentStack().clear();
removeOverlappedElements(page, writer, context);
}
writer.destroy();
reader.destroy();
removeOverlappedElements(page, writer, context);
}
}
log.info("Finished removing invisible Elements");
@ -329,7 +325,6 @@ public class InvisibleElementRemovalService {
processElements(formWriter, context);
formWriter.end();
formWriter.destroy();
context.reader().end();
context.clippingPathStack().leaveGState();
}
@ -494,7 +489,6 @@ public class InvisibleElementRemovalService {
processOverlappedElements(formWriter, context);
formWriter.end();
formWriter.destroy();
context.reader().end();
}
}
@ -589,17 +583,13 @@ public class InvisibleElementRemovalService {
try(ColorPt colorPt = new ColorPt(Integer.valueOf(hexcolor.substring(1, 3), 16) / 255d,
Integer.valueOf(hexcolor.substring(3, 5), 16) / 255d,
Integer.valueOf(hexcolor.substring(5, 7), 16) / 255d)) {
try(ElementBuilder eb = new ElementBuilder()) {
Element rect = eb.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight());
rect.setPathStroke(true);
rect.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
rect.getGState().setStrokeColor(colorPt);
writer.writePlacedElement(rect);
colorPt.destroy();
eb.destroy();
}
Integer.valueOf(hexcolor.substring(5, 7), 16) / 255d);
ElementBuilder eb = new ElementBuilder()) {
Element rect = eb.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight());
rect.setPathStroke(true);
rect.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
rect.getGState().setStrokeColor(colorPt);
writer.writePlacedElement(rect);
}
}

View File

@ -7,33 +7,33 @@ import java.util.Set;
public class MarkedContentStack {
Deque<MarkedContent> markedContentQueue = new LinkedList<>();
Deque<MarkedContent> stack = new LinkedList<>();
public void enterMarkedContent(String name) {
markedContentQueue.push(new MarkedContent(name));
stack.push(new MarkedContent(name));
}
public void leaveMarkedContent() {
markedContentQueue.pop();
stack.pop();
}
public String currentMarkedContent() {
if (markedContentQueue.isEmpty()) {
if (stack.isEmpty()) {
return "";
}
return markedContentQueue.peek().name();
return stack.peek().name();
}
public boolean currentMarkedContentContains(String name) {
Iterator<MarkedContent> markedContentIterator = markedContentQueue.descendingIterator();
Iterator<MarkedContent> markedContentIterator = stack.descendingIterator();
while (markedContentIterator.hasNext()) {
var markedContent = markedContentIterator.next();
if (markedContent.name().equals(name)) {
@ -46,10 +46,10 @@ public class MarkedContentStack {
public boolean currentMarkedContentContainsAny(Set<String> names) {
if (markedContentQueue.isEmpty()) {
if (stack.isEmpty()) {
return false;
}
Iterator<MarkedContent> markedContentIterator = markedContentQueue.descendingIterator();
Iterator<MarkedContent> markedContentIterator = stack.descendingIterator();
while (markedContentIterator.hasNext()) {
var markedContent = markedContentIterator.next();
if (names.contains(markedContent.name())) {
@ -62,7 +62,7 @@ public class MarkedContentStack {
public void clear() {
markedContentQueue.clear();
stack.clear();
}

View File

@ -43,18 +43,14 @@ public class OCGWatermarkRemovalService {
@SneakyThrows
private void removeOCGWatermarks(PDFDoc pdfDoc) {
try(ElementReader reader = new ElementReader()) {
try(ElementWriter writer = new ElementWriter()) {
Set<Long> visitedXObjIds = new TreeSet<>();
try(ElementReader reader = new ElementReader();
ElementWriter writer = new ElementWriter()) {
Set<Long> visitedXObjIds = new TreeSet<>();
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
Page page = iterator.next();
writeAllElementsExceptWatermarks(page, reader, writer, visitedXObjIds);
}
reader.destroy();
writer.destroy();
Page page = iterator.next();
writeAllElementsExceptWatermarks(page, reader, writer, visitedXObjIds);
}
}
}
@ -135,7 +131,6 @@ public class OCGWatermarkRemovalService {
processElements(page, reader, formWriter, visitedXObjIds);
formWriter.end();
formWriter.destroy();
reader.end();
}
}

View File

@ -27,7 +27,6 @@ public class PdfTextExtraction {
texts.add(extractor.getAsText());
}
extractor.destroy();
pdfDoc.close();
return String.join("\n", texts);
}

View File

@ -111,8 +111,6 @@ public class WatermarkRemovalService {
formObjectsAndImagesForPages.put(page.getSDFObj().getObjNum(), elementFeaturesLinkedList);
}
reader.destroy();
return formObjectsAndImagesForPages;
}
}
@ -227,7 +225,6 @@ public class WatermarkRemovalService {
processElement(element1, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringPage, page);
}
elementFeaturesLinkedList.add(ElementFeatureFactory.extractFeatures(element));
xObjectReader.destroy();
}
} else {
elementFeaturesLinkedList.add(ElementFeatureFactory.extractFeatures(element));
@ -258,20 +255,15 @@ public class WatermarkRemovalService {
@SneakyThrows
private void removeAllWatermarks(PDFDoc pdfDoc, List<ElementFeatures> watermarksElementFeaturesList) {
try(ElementReader reader = new ElementReader()) {
try (ElementWriter writer = new ElementWriter()) {
Set<Long> visitedXObjIds = new TreeSet<>();
try(ElementReader reader = new ElementReader();
ElementWriter writer = new ElementWriter()) {
Set<Long> visitedXObjIds = new TreeSet<>();
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
Page page = iterator.next();
Page page = iterator.next();
writeAllElementsExceptWatermarks(page, reader, writer, watermarksElementFeaturesList, visitedXObjIds);
}
reader.destroy();
writer.destroy();
writeAllElementsExceptWatermarks(page, reader, writer, watermarksElementFeaturesList, visitedXObjIds);
}
}
}
@ -402,7 +394,6 @@ public class WatermarkRemovalService {
processElements(page, reader, formWriter, watermarksElementFeaturesList, visitedXObjIds);
formWriter.end();
formWriter.destroy();
reader.end();
}
}