Merge branch 'RED-8359' into 'master'

RED-8359 - Migrate to gradle

Closes RED-8359

See merge request redactmanager/commons/pdftron-logic-commons!23
This commit is contained in:
Andrei Isvoran 2024-02-08 10:11:09 +01:00
commit 73d77624e9
19 changed files with 377 additions and 286 deletions

8
.gitignore vendored
View File

@ -27,3 +27,11 @@
**/classpath-data.json
**/dependencies-and-licenses-overview.txt
git.tag
gradle.properties
gradlew
gradlew.bat
gradle/
**/.gradle
**/build

View File

@ -1,4 +1,5 @@
include:
- project: 'gitlab/gitlab'
ref: 'main'
file: 'ci-templates/maven_deps.yml'
file: 'ci-templates/gradle_java.yml'

102
build.gradle.kts Normal file
View File

@ -0,0 +1,102 @@
plugins {
`java-library`
`maven-publish`
`kotlin-dsl`
pmd
checkstyle
jacoco
id("io.freefair.lombok") version "8.4"
id("org.sonarqube") version "4.0.0.2929"
}
repositories {
mavenLocal()
maven {
url = uri("https://pdftron.com/maven/release")
}
maven {
url = uri("https://nexus.knecon.com/repository/gindev/");
credentials {
username = providers.gradleProperty("mavenUser").getOrNull();
password = providers.gradleProperty("mavenPassword").getOrNull();
}
}
mavenCentral()
}
dependencies {
api("org.projectlombok:lombok:1.18.30")
api("org.springframework.boot:spring-boot-configuration-processor:3.2.2")
api("com.google.guava:guava:33.0.0-jre")
api("com.pdftron:PDFNet:10.3.0")
testImplementation("org.junit.jupiter:junit-jupiter:5.10.2")
testImplementation("org.assertj:assertj-core:3.24.2")
testImplementation("org.mockito:mockito-core:5.2.0")
testImplementation("org.apache.logging.log4j:log4j-slf4j2-impl:2.22.1")
compileOnly("org.slf4j:slf4j-api:2.0.11")
}
group = "com.iqser.red.commons"
description = "pdftron-logic-commons"
java.sourceCompatibility = JavaVersion.VERSION_17
java.targetCompatibility = JavaVersion.VERSION_17
publishing {
publications {
create<MavenPublication>(name) {
from(components["java"])
}
}
repositories {
maven {
url = uri("https://nexus.knecon.com/repository/red-platform-releases/")
credentials {
username = providers.gradleProperty("mavenUser").getOrNull();
password = providers.gradleProperty("mavenPassword").getOrNull();
}
}
}
}
pmd {
isConsoleOutput = true
}
tasks.pmdMain {
pmd.ruleSetFiles = files("${rootDir}/config/pmd/pmd.xml")
}
tasks.pmdTest {
pmd.ruleSetFiles = files("${rootDir}/config/pmd/test_pmd.xml")
}
tasks.named<Test>("test") {
useJUnitPlatform()
reports {
junitXml.outputLocation.set(layout.buildDirectory.dir("reports/junit"))
}
}
sonarqube {
properties {
property("sonar.login", providers.gradleProperty("sonarToken").getOrNull())
property("sonar.host.url", "https://sonarqube.knecon.com")
}
}
tasks.test {
finalizedBy(tasks.jacocoTestReport)
}
tasks.jacocoTestReport {
dependsOn(tasks.test)
reports {
xml.required.set(true)
csv.required.set(false)
html.outputLocation.set(layout.buildDirectory.dir("jacocoHtml"))
}
}
java {
withJavadocJar()
}

View File

@ -0,0 +1,39 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE module PUBLIC "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
<module name="Checker">
<property
name="severity"
value="error"/>
<module name="TreeWalker">
<module name="SuppressWarningsHolder"/>
<module name="MissingDeprecated"/>
<module name="MissingOverride"/>
<module name="AnnotationLocation"/>
<module name="JavadocStyle"/>
<module name="NonEmptyAtclauseDescription"/>
<module name="IllegalImport"/>
<module name="RedundantImport"/>
<module name="RedundantModifier"/>
<module name="EmptyBlock"/>
<module name="DefaultComesLast"/>
<module name="EmptyStatement"/>
<module name="EqualsHashCode"/>
<module name="ExplicitInitialization"/>
<module name="IllegalInstantiation"/>
<module name="ModifiedControlVariable"/>
<module name="MultipleVariableDeclarations"/>
<module name="PackageDeclaration"/>
<module name="ParameterAssignment"/>
<module name="SimplifyBooleanExpression"/>
<module name="SimplifyBooleanReturn"/>
<module name="StringLiteralEquality"/>
<module name="OneStatementPerLine"/>
<module name="FinalClass"/>
<module name="ArrayTypeStyle"/>
<module name="UpperEll"/>
<module name="OuterTypeFilename"/>
</module>
<module name="FileTabCharacter"/>
<module name="SuppressWarningsFilter"/>
</module>

20
config/pmd/pmd.xml Normal file
View File

@ -0,0 +1,20 @@
<?xml version="1.0"?>
<ruleset name="Custom ruleset"
xmlns="http://pmd.sourceforge.net/ruleset/2.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://pmd.sourceforge.net/ruleset/2.0.0 http://pmd.sourceforge.net/ruleset_2_0_0.xsd">
<description>
Knecon ruleset checks the code for bad stuff
</description>
<rule ref="category/java/errorprone.xml">
<exclude name="MissingSerialVersionUID"/>
<exclude name="AvoidLiteralsInIfCondition"/>
<exclude name="AvoidDuplicateLiterals"/>
<exclude name="NullAssignment"/>
<exclude name="AssignmentInOperand"/>
<exclude name="BeanMembersShouldSerialize"/>
</rule>
</ruleset>

22
config/pmd/test_pmd.xml Normal file
View File

@ -0,0 +1,22 @@
<?xml version="1.0"?>
<ruleset name="Custom ruleset"
xmlns="http://pmd.sourceforge.net/ruleset/2.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://pmd.sourceforge.net/ruleset/2.0.0 http://pmd.sourceforge.net/ruleset_2_0_0.xsd">
<description>
Knecon test ruleset checks the code for bad stuff
</description>
<rule ref="category/java/errorprone.xml">
<exclude name="MissingSerialVersionUID"/>
<exclude name="AvoidLiteralsInIfCondition"/>
<exclude name="AvoidDuplicateLiterals"/>
<exclude name="NullAssignment"/>
<exclude name="AssignmentInOperand"/>
<exclude name="TestClassWithoutTestCases"/>
<exclude name="BeanMembersShouldSerialize"/>
</rule>
</ruleset>

1
gradle.properties.kts Normal file
View File

@ -0,0 +1 @@
version = 2.0-SNAPSHOT

94
pom.xml
View File

@ -1,94 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>platform-dependency</artifactId>
<groupId>com.iqser.red</groupId>
<version>2.2.0</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<artifactId>pdftron-logic-commons</artifactId>
<groupId>com.iqser.red.commons</groupId>
<version>2.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j2-impl</artifactId>
<version>2.20.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>
<dependency>
<groupId>com.pdftron</groupId>
<artifactId>PDFNet</artifactId>
<version>10.3.0</version>
<scope>provided</scope>
</dependency>
<!-- Test Dependencies -->
</dependencies>
<build>
<plugins>
<plugin>
<!-- create a test jar for the api classes to be used by other modules -->
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<executions>
<execution>
<id>prepare-agent</id>
<goals>
<goal>prepare-agent</goal>
</goals>
</execution>
<execution>
<id>report</id>
<goals>
<goal>report</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
<repositories>
<repository>
<id>pdftron</id>
<name>PDFNet Maven</name>
<url>https://pdftron.com/maven/release</url>
</repository>
</repositories>
</project>

1
settings.gradle.kts Normal file
View File

@ -0,0 +1 @@
rootProject.name = "pdftron-logic-commons"

View File

@ -62,10 +62,9 @@ public class Converter {
@SneakyThrows
public static Color convertColor(ColorSpace colorSpace, ColorPt colorPt) {
ColorPt rgbColor = colorSpace.convert2RGB(colorPt);
Color color = new Color((float) rgbColor.get(0), (float) rgbColor.get(1), (float) rgbColor.get(2));
rgbColor.destroy();
return color;
try (ColorPt rgbColor = colorSpace.convert2RGB(colorPt)) {
return new Color((float) rgbColor.get(0), (float) rgbColor.get(1), (float) rgbColor.get(2));
}
}

View File

@ -103,6 +103,7 @@ public class ElementFeatures {
@Getter
@SuperBuilder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
@SuppressWarnings("PMD")
public static class Text extends ElementFeatures {
String text;

View File

@ -35,17 +35,16 @@ public class ImageHashFactory {
@SneakyThrows
private byte[] getBytesOfImage(com.pdftron.pdf.Image inputImage) {
// 0 because the memory filter determines the size
var memFilter = new MemoryFilter(0, false);
var filterWriter = new FilterWriter(memFilter);
try(var memFilter = new MemoryFilter(0, false);
var filterWriter = new FilterWriter(memFilter)) {
inputImage.export(filterWriter);
filterWriter.flushAll();
byte[] res = memFilter.getBuffer();
inputImage.export(filterWriter);
filterWriter.flushAll();
byte[] res = memFilter.getBuffer();
memFilter.flushAll();
memFilter.destroy();
filterWriter.destroy();
return res;
memFilter.flushAll();
return res;
}
}

View File

@ -151,39 +151,36 @@ public class InvisibleElementRemovalService {
private void execute(PDFDoc pdfDoc, boolean delta, boolean removePaths, Set<String> markedContentToIgnore) {
log.info("Start removing invisible Elements");
ElementWriter writer = new ElementWriter();
ElementReader reader = new ElementReader();
Set<Long> visitedXObjIds = new TreeSet<>();
try(ElementWriter writer = new ElementWriter();
ElementReader reader = new ElementReader()) {
Set<Long> visitedXObjIds = new TreeSet<>();
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
Page page = iterator.next();
Page page = iterator.next();
visitedXObjIds.add(page.getSDFObj().getObjNum());
visitedXObjIds.add(page.getSDFObj().getObjNum());
InvisibleElementRemovalContext context = InvisibleElementRemovalContext.builder()
.reader(reader)
.clippingPathStack(new ClippingPathStack(page.getMediaBox()))
.markedContentStack(new MarkedContentStack())
.removePaths(removePaths)
.delta(delta)
.overlappedElements(new ArrayList<>())
.visibleElements(new ArrayList<>())
.visitedXObjIds(visitedXObjIds)
.markedContentToIgnore(markedContentToIgnore)
.build();
InvisibleElementRemovalContext context = InvisibleElementRemovalContext.builder()
.reader(reader)
.clippingPathStack(new ClippingPathStack(page.getMediaBox()))
.markedContentStack(new MarkedContentStack())
.removePaths(removePaths)
.delta(delta)
.overlappedElements(new ArrayList<>())
.visibleElements(new ArrayList<>())
.visitedXObjIds(visitedXObjIds)
.markedContentToIgnore(markedContentToIgnore)
.build();
removeClippedElementsAndInvisibleTextAndRememberOverlappedElements(page, writer, context);
removeClippedElementsAndInvisibleTextAndRememberOverlappedElements(page, writer, context);
context.visitedXObjIds().clear();
context.markedContentStack().clear();
removeOverlappedElements(page, writer, context);
context.visitedXObjIds().clear();
context.markedContentStack().clear();
removeOverlappedElements(page, writer, context);
}
}
writer.destroy();
reader.destroy();
log.info("Finished removing invisible Elements");
}
@ -239,68 +236,70 @@ public class InvisibleElementRemovalService {
private void processImages(Element imageElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
Rect rect = imageElement.getBBox();
try(Rect rect = imageElement.getBBox()) {
if (rect == null) {
return;
}
if (rect == null) {
return;
}
boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
if (!context.delta() && inClippingPath) {
context.visibleElements().add(ElementFeatureFactory.extractFeatures(imageElement));
}
if (!context.delta() && inClippingPath) {
context.visibleElements().add(ElementFeatureFactory.extractFeatures(imageElement));
}
if (context.delta() ^ inClippingPath) {
writer.writeElement(imageElement);
if (context.delta() ^ inClippingPath) {
writer.writeElement(imageElement);
}
}
}
private void processText(Element textElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
Rect textBBox = textElement.getBBox();
try(Rect textBBox = textElement.getBBox()) {
if (textBBox == null) {
writer.writeElement(textElement);
return;
}
GState gState = textElement.getGState();
boolean inClippingPath = context.clippingPathStack().almostIntersects(textBBox.getX1(), textBBox.getY1(), textBBox.getWidth(), textBBox.getHeight());
boolean isTextVisible = isTextRenderedVisibly(gState, textBBox, context);
if (inClippingPath && isTextVisible) {
context.visibleElements().add(ElementFeatureFactory.extractFeatures(textElement));
}
if (!context.delta()) {
if (inClippingPath && isTextVisible) {
if (textBBox == null) {
writer.writeElement(textElement);
} else if (textElement.hasTextMatrix()) {
return;
}
GState gState = textElement.getGState();
boolean inClippingPath = context.clippingPathStack().almostIntersects(textBBox.getX1(), textBBox.getY1(), textBBox.getWidth(), textBBox.getHeight());
boolean isTextVisible = isTextRenderedVisibly(gState, textBBox, context);
if (inClippingPath && isTextVisible) {
context.visibleElements().add(ElementFeatureFactory.extractFeatures(textElement));
}
if (!context.delta()) {
if (inClippingPath && isTextVisible) {
writer.writeElement(textElement);
} else if (textElement.hasTextMatrix()) {
/*
PDFTron Element with type "text" refers to a Tj command. If a Tm command is just above it in the pdf file, PDFTron will join the two commands and treat them as one Element.
hasTextMatrix() checks for this case specifically. Also, Tm changes the position for a whole BT/ET segment, possibly containing multiple Tj commands.
Therefore, the position of a following Tj is affected by not writing the first Element.
This is why, we write only the Tm command:
*/
writer.writeGStateChanges(textElement);
}
} else {
if (!inClippingPath) {
gState.setFillColorSpace(ColorSpace.createDeviceRGB());
// red for elements removed by clipping path
gState.setFillColor(new ColorPt(1, 0, 0));
writer.writeElement(textElement);
}
if (!isTextVisible) {
gState.setFillColorSpace(ColorSpace.createDeviceRGB());
// blue for elements removed due to transparency or not rendered or same color as background
gState.setFillColor(new ColorPt(0, 0, 1));
gState.setTextRenderMode(GState.e_fill_text);
gState.setFillOpacity(1);
writer.writeElement(textElement);
writer.writeGStateChanges(textElement);
}
} else {
if (!inClippingPath) {
gState.setFillColorSpace(ColorSpace.createDeviceRGB());
// red for elements removed by clipping path
gState.setFillColor(new ColorPt(1, 0, 0));
writer.writeElement(textElement);
}
if (!isTextVisible) {
gState.setFillColorSpace(ColorSpace.createDeviceRGB());
// blue for elements removed due to transparency or not rendered or same color as background
gState.setFillColor(new ColorPt(0, 0, 1));
gState.setTextRenderMode(GState.e_fill_text);
gState.setFillOpacity(1);
writer.writeElement(textElement);
}
}
}
}
@ -315,20 +314,20 @@ public class InvisibleElementRemovalService {
context.visitedXObjIds().add(formObj.getObjNum());
// writer needs to be newly initialized when entering a new content stream
// see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest)
ElementWriter formWriter = new ElementWriter();
context.clippingPathStack().enterNewGState();
context.clippingPathStack().intersectClippingPath(new GeneralPath(Converter.toRectangle2D(formElement.getBBox())));
context.reader().formBegin();
formWriter.begin(formObj);
try(ElementWriter formWriter = new ElementWriter()) {
context.clippingPathStack().enterNewGState();
context.clippingPathStack().intersectClippingPath(new GeneralPath(Converter.toRectangle2D(formElement.getBBox())));
context.reader().formBegin();
formWriter.begin(formObj);
context.reader().clearChangeList();
formWriter.setDefaultGState(context.reader());
context.reader().clearChangeList();
formWriter.setDefaultGState(context.reader());
processElements(formWriter, context);
formWriter.end();
formWriter.destroy();
context.reader().end();
context.clippingPathStack().leaveGState();
processElements(formWriter, context);
formWriter.end();
context.reader().end();
context.clippingPathStack().leaveGState();
}
}
}
@ -481,17 +480,17 @@ public class InvisibleElementRemovalService {
context.visitedXObjIds().add(formObj.getObjNum());
// writer needs to be newly initialized when entering a new content stream
// see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest)
ElementWriter formWriter = new ElementWriter();
context.reader().formBegin();
formWriter.begin(formObj);
try(ElementWriter formWriter = new ElementWriter()) {
context.reader().formBegin();
formWriter.begin(formObj);
context.reader().clearChangeList();
formWriter.setDefaultGState(context.reader());
context.reader().clearChangeList();
formWriter.setDefaultGState(context.reader());
processOverlappedElements(formWriter, context);
formWriter.end();
formWriter.destroy();
context.reader().end();
processOverlappedElements(formWriter, context);
formWriter.end();
context.reader().end();
}
}
}
@ -582,18 +581,16 @@ public class InvisibleElementRemovalService {
@SneakyThrows
private void drawBBox(ElementWriter writer, Rectangle2D r, String hexcolor) {
ColorPt colorPt = new ColorPt(Integer.valueOf(hexcolor.substring(1, 3), 16) / 255d,
try(ColorPt colorPt = new ColorPt(Integer.valueOf(hexcolor.substring(1, 3), 16) / 255d,
Integer.valueOf(hexcolor.substring(3, 5), 16) / 255d,
Integer.valueOf(hexcolor.substring(5, 7), 16) / 255d);
ElementBuilder eb = new ElementBuilder();
Element rect = eb.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight());
rect.setPathStroke(true);
rect.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
rect.getGState().setStrokeColor(colorPt);
writer.writePlacedElement(rect);
colorPt.destroy();
eb.destroy();
ElementBuilder eb = new ElementBuilder()) {
Element rect = eb.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight());
rect.setPathStroke(true);
rect.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
rect.getGState().setStrokeColor(colorPt);
writer.writePlacedElement(rect);
}
}

View File

@ -7,33 +7,33 @@ import java.util.Set;
public class MarkedContentStack {
Deque<MarkedContent> markedContentStack = new LinkedList<>();
Deque<MarkedContent> stack = new LinkedList<>();
public void enterMarkedContent(String name) {
markedContentStack.push(new MarkedContent(name));
stack.push(new MarkedContent(name));
}
public void leaveMarkedContent() {
markedContentStack.pop();
stack.pop();
}
public String currentMarkedContent() {
if (markedContentStack.isEmpty()) {
if (stack.isEmpty()) {
return "";
}
return markedContentStack.peek().name();
return stack.peek().name();
}
public boolean currentMarkedContentContains(String name) {
Iterator<MarkedContent> markedContentIterator = markedContentStack.descendingIterator();
Iterator<MarkedContent> markedContentIterator = stack.descendingIterator();
while (markedContentIterator.hasNext()) {
var markedContent = markedContentIterator.next();
if (markedContent.name().equals(name)) {
@ -46,10 +46,10 @@ public class MarkedContentStack {
public boolean currentMarkedContentContainsAny(Set<String> names) {
if (markedContentStack.isEmpty()) {
if (stack.isEmpty()) {
return false;
}
Iterator<MarkedContent> markedContentIterator = markedContentStack.descendingIterator();
Iterator<MarkedContent> markedContentIterator = stack.descendingIterator();
while (markedContentIterator.hasNext()) {
var markedContent = markedContentIterator.next();
if (names.contains(markedContent.name())) {
@ -62,7 +62,7 @@ public class MarkedContentStack {
public void clear() {
markedContentStack.clear();
stack.clear();
}

View File

@ -43,18 +43,16 @@ public class OCGWatermarkRemovalService {
@SneakyThrows
private void removeOCGWatermarks(PDFDoc pdfDoc) {
ElementReader reader = new ElementReader();
ElementWriter writer = new ElementWriter();
Set<Long> visitedXObjIds = new TreeSet<>();
try(ElementReader reader = new ElementReader();
ElementWriter writer = new ElementWriter()) {
Set<Long> visitedXObjIds = new TreeSet<>();
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
Page page = iterator.next();
writeAllElementsExceptWatermarks(page, reader, writer, visitedXObjIds);
Page page = iterator.next();
writeAllElementsExceptWatermarks(page, reader, writer, visitedXObjIds);
}
}
reader.destroy();
writer.destroy();
}
@ -124,17 +122,17 @@ public class OCGWatermarkRemovalService {
visitedXObjIds.add(element.getXObject().getObjNum());
// writer needs to be newly initialized when entering a new content stream
// see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest)
ElementWriter formWriter = new ElementWriter();
reader.formBegin();
formWriter.begin(element.getXObject());
try(ElementWriter formWriter = new ElementWriter()) {
reader.formBegin();
formWriter.begin(element.getXObject());
reader.clearChangeList();
formWriter.setDefaultGState(reader);
reader.clearChangeList();
formWriter.setDefaultGState(reader);
processElements(page, reader, formWriter, visitedXObjIds);
formWriter.end();
formWriter.destroy();
reader.end();
processElements(page, reader, formWriter, visitedXObjIds);
formWriter.end();
reader.end();
}
}
}

View File

@ -16,20 +16,20 @@ import lombok.experimental.UtilityClass;
@UtilityClass
public class PdfTextExtraction {
private static String execute(PDFDoc pdfDoc) throws IOException, PDFNetException{
TextExtractor extractor = new TextExtractor();
List<String> texts = new ArrayList<>();
private static String execute(PDFDoc pdfDoc) throws PDFNetException{
try(TextExtractor extractor = new TextExtractor()) {
List<String> texts = new ArrayList<>();
PageIterator iterator = pdfDoc.getPageIterator();
while (iterator.hasNext()) {
Page page = iterator.next();
extractor.begin(page);
texts.add(extractor.getAsText());
PageIterator iterator = pdfDoc.getPageIterator();
while (iterator.hasNext()) {
Page page = iterator.next();
extractor.begin(page);
texts.add(extractor.getAsText());
}
pdfDoc.close();
return String.join("\n", texts);
}
extractor.destroy();
pdfDoc.close();
return String.join("\n", texts);
}
public static String extractAllTextFromDocument(InputStream fileStream) throws IOException, PDFNetException {

View File

@ -93,27 +93,26 @@ public class WatermarkRemovalService {
Map<Long, List<ElementFeatures>> formObjectsAndImagesForPages = new HashMap<>();
Set<Long> visitedXObjIds = new TreeSet<>();
ElementReader reader = new ElementReader();
try(ElementReader reader = new ElementReader()) {
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
Page page = iterator.next();
Page page = iterator.next();
double minAreaCoveringFromPage = AREA_THRESHOLD * page.getPageHeight() * page.getPageWidth();
double minAreaCoveringFromPage = AREA_THRESHOLD * page.getPageHeight() * page.getPageWidth();
LinkedList<ElementFeatures> elementFeaturesLinkedList = new LinkedList<>();
LinkedList<ElementFeatures> elementFeaturesLinkedList = new LinkedList<>();
reader.begin(page);
for (Element element = reader.next(); element != null; element = reader.next()) {
processElement(element, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringFromPage, page);
reader.begin(page);
for (Element element = reader.next(); element != null; element = reader.next()) {
processElement(element, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringFromPage, page);
}
formObjectsAndImagesForPages.put(page.getSDFObj().getObjNum(), elementFeaturesLinkedList);
}
formObjectsAndImagesForPages.put(page.getSDFObj().getObjNum(), elementFeaturesLinkedList);
return formObjectsAndImagesForPages;
}
reader.destroy();
return formObjectsAndImagesForPages;
}
@ -220,13 +219,13 @@ public class WatermarkRemovalService {
}
if (visitedXObjIds.add(element.getXObject().getObjNum())) {
ElementReader xObjectReader = new ElementReader();
xObjectReader.begin(element.getXObject());
for (Element element1 = xObjectReader.next(); element1 != null; element1 = xObjectReader.next()) {
processElement(element1, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringPage, page);
try(ElementReader xObjectReader = new ElementReader()) {
xObjectReader.begin(element.getXObject());
for (Element element1 = xObjectReader.next(); element1 != null; element1 = xObjectReader.next()) {
processElement(element1, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringPage, page);
}
elementFeaturesLinkedList.add(ElementFeatureFactory.extractFeatures(element));
}
elementFeaturesLinkedList.add(ElementFeatureFactory.extractFeatures(element));
xObjectReader.destroy();
} else {
elementFeaturesLinkedList.add(ElementFeatureFactory.extractFeatures(element));
}
@ -256,21 +255,17 @@ public class WatermarkRemovalService {
@SneakyThrows
private void removeAllWatermarks(PDFDoc pdfDoc, List<ElementFeatures> watermarksElementFeaturesList) {
ElementReader reader = new ElementReader();
ElementWriter writer = new ElementWriter();
Set<Long> visitedXObjIds = new TreeSet<>();
try(ElementReader reader = new ElementReader();
ElementWriter writer = new ElementWriter()) {
Set<Long> visitedXObjIds = new TreeSet<>();
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
Page page = iterator.next();
writeAllElementsExceptWatermarks(page, reader, writer, watermarksElementFeaturesList, visitedXObjIds);
Page page = iterator.next();
writeAllElementsExceptWatermarks(page, reader, writer, watermarksElementFeaturesList, visitedXObjIds);
}
}
reader.destroy();
writer.destroy();
}
@ -390,17 +385,17 @@ public class WatermarkRemovalService {
visitedXObjIds.add(element.getXObject().getObjNum());
// writer needs to be newly initialized when entering a new content stream
// see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest)
ElementWriter formWriter = new ElementWriter();
reader.formBegin();
formWriter.begin(element.getXObject());
try(ElementWriter formWriter = new ElementWriter()) {
reader.formBegin();
formWriter.begin(element.getXObject());
reader.clearChangeList();
formWriter.setDefaultGState(reader);
reader.clearChangeList();
formWriter.setDefaultGState(reader);
processElements(page, reader, formWriter, watermarksElementFeaturesList, visitedXObjIds);
formWriter.end();
formWriter.destroy();
reader.end();
processElements(page, reader, formWriter, watermarksElementFeaturesList, visitedXObjIds);
formWriter.end();
reader.end();
}
}
}

View File

@ -13,6 +13,7 @@ import com.pdftron.pdf.PDFNet;
import lombok.SneakyThrows;
@SuppressWarnings("PMD")
class InvisibleElementRemovalServiceTest {
InvisibleElementRemovalService invisibleElementRemovalService;

View File

@ -13,6 +13,7 @@ import com.pdftron.pdf.PDFNet;
import lombok.SneakyThrows;
@Disabled
@SuppressWarnings("PMD")
class WatermarkRemovalServiceTest {
@SneakyThrows