gradle draft
This commit is contained in:
parent
3bbd13daca
commit
46c9bed663
8
.gitignore
vendored
8
.gitignore
vendored
@ -27,3 +27,11 @@
|
||||
**/classpath-data.json
|
||||
**/dependencies-and-licenses-overview.txt
|
||||
git.tag
|
||||
|
||||
gradle.properties
|
||||
gradlew
|
||||
gradlew.bat
|
||||
gradle/
|
||||
|
||||
**/.gradle
|
||||
**/build
|
||||
|
||||
@ -1,4 +1,21 @@
|
||||
include:
|
||||
- project: 'gitlab/gitlab'
|
||||
ref: 'main'
|
||||
file: 'ci-templates/maven_deps.yml'
|
||||
file: 'ci-templates/gradle_java.yml'
|
||||
|
||||
deploy:
|
||||
stage: deploy
|
||||
tags:
|
||||
- dind
|
||||
script:
|
||||
- echo "Building with gradle version ${BUILDVERSION}"
|
||||
- gradle -Pversion=${BUILDVERSION} publish
|
||||
- gradle bootBuildImage --cleanCache --publishImage -PbuildbootDockerHostNetwork=true -Pversion=${BUILDVERSION}
|
||||
- echo "BUILDVERSION=$BUILDVERSION" >> version.env
|
||||
artifacts:
|
||||
reports:
|
||||
dotenv: version.env
|
||||
rules:
|
||||
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
|
||||
- if: $CI_COMMIT_BRANCH =~ /^release/
|
||||
- if: $CI_COMMIT_TAG
|
||||
|
||||
136
build.gradle.kts
Normal file
136
build.gradle.kts
Normal file
@ -0,0 +1,136 @@
|
||||
plugins {
|
||||
`java-library`
|
||||
`maven-publish`
|
||||
`kotlin-dsl`
|
||||
pmd
|
||||
checkstyle
|
||||
jacoco
|
||||
id("io.freefair.lombok") version "8.4"
|
||||
}
|
||||
|
||||
java.sourceCompatibility = JavaVersion.VERSION_17
|
||||
java.targetCompatibility = JavaVersion.VERSION_17
|
||||
|
||||
repositories {
|
||||
mavenLocal()
|
||||
maven {
|
||||
url = uri("https://nexus.knecon.com/repository/gindev/")
|
||||
}
|
||||
|
||||
maven {
|
||||
url = uri("https://pdftron.com/maven/release")
|
||||
}
|
||||
|
||||
maven {
|
||||
url = uri("https://repo.spring.io/milestone")
|
||||
}
|
||||
|
||||
maven {
|
||||
url = uri("http://dist.wso2.org/maven2/")
|
||||
isAllowInsecureProtocol = true
|
||||
}
|
||||
|
||||
maven {
|
||||
url = uri("https://projectlombok.org/edge-releases")
|
||||
}
|
||||
|
||||
maven {
|
||||
url = uri("https://repo.maven.apache.org/maven2/")
|
||||
}
|
||||
|
||||
maven {
|
||||
url = uri("https://pdftron.com/maven/release")
|
||||
}
|
||||
|
||||
maven {
|
||||
url = uri("https://nexus.knecon.com/repository/red-platform-releases/")
|
||||
credentials {
|
||||
username = providers.gradleProperty("mavenUser").getOrNull();
|
||||
password = providers.gradleProperty("mavenPassword").getOrNull();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
api(libs.org.projectlombok.lombok)
|
||||
api(libs.org.springframework.boot.spring.boot.configuration.processor)
|
||||
api(libs.com.google.guava.guava)
|
||||
api(libs.org.projectlombok.lombok)
|
||||
api(libs.org.springframework.boot.spring.boot.configuration.processor)
|
||||
implementation("com.pdftron:PDFNet:10.5.0")
|
||||
testImplementation(libs.org.junit.jupiter.junit.jupiter)
|
||||
testImplementation(libs.org.assertj.assertj.core)
|
||||
testImplementation(libs.org.mockito.mockito.core)
|
||||
testImplementation(libs.org.apache.logging.log4j.log4j.slf4j2.impl)
|
||||
testImplementation(libs.org.junit.jupiter.junit.jupiter)
|
||||
testImplementation(libs.org.assertj.assertj.core)
|
||||
testImplementation(libs.org.mockito.mockito.core)
|
||||
compileOnly(libs.org.slf4j.slf4j.api)
|
||||
compileOnly(libs.com.pdftron.pdfnet)
|
||||
}
|
||||
|
||||
group = "com.iqser.red.commons"
|
||||
version = "2.0-SNAPSHOT"
|
||||
description = "pdftron-logic-commons"
|
||||
java.sourceCompatibility = JavaVersion.VERSION_17
|
||||
|
||||
val testsJar by tasks.registering(Jar::class) {
|
||||
archiveClassifier.set("tests")
|
||||
from(sourceSets["test"].output)
|
||||
}
|
||||
|
||||
java {
|
||||
withSourcesJar()
|
||||
withJavadocJar()
|
||||
}
|
||||
|
||||
publishing {
|
||||
publications.create<MavenPublication>("maven") {
|
||||
from(components["java"])
|
||||
artifact(testsJar)
|
||||
}
|
||||
}
|
||||
|
||||
tasks.withType<JavaCompile>() {
|
||||
options.encoding = "UTF-8"
|
||||
}
|
||||
|
||||
tasks.withType<Javadoc>() {
|
||||
options.encoding = "UTF-8"
|
||||
}
|
||||
|
||||
pmd {
|
||||
isConsoleOutput = true
|
||||
}
|
||||
|
||||
tasks.pmdMain {
|
||||
pmd.ruleSetFiles = files("${rootDir}/config/pmd/pmd.xml")
|
||||
}
|
||||
|
||||
tasks.pmdTest {
|
||||
pmd.ruleSetFiles = files("${rootDir}/config/pmd/test_pmd.xml")
|
||||
}
|
||||
|
||||
tasks.named<Test>("test") {
|
||||
useJUnitPlatform()
|
||||
reports {
|
||||
junitXml.outputLocation.set(layout.buildDirectory.dir("reports/junit"))
|
||||
}
|
||||
}
|
||||
|
||||
tasks.test {
|
||||
finalizedBy(tasks.jacocoTestReport) // report is always generated after tests run
|
||||
}
|
||||
|
||||
tasks.jacocoTestReport {
|
||||
dependsOn(tasks.test) // tests are required to run before generating the report
|
||||
reports {
|
||||
xml.required.set(true)
|
||||
csv.required.set(false)
|
||||
html.outputLocation.set(layout.buildDirectory.dir("jacocoHtml"))
|
||||
}
|
||||
}
|
||||
|
||||
java {
|
||||
withJavadocJar()
|
||||
}
|
||||
39
config/checkstyle/checkstyle.xml
Normal file
39
config/checkstyle/checkstyle.xml
Normal file
@ -0,0 +1,39 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE module PUBLIC "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
||||
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
||||
<module name="Checker">
|
||||
<property
|
||||
name="severity"
|
||||
value="error"/>
|
||||
<module name="TreeWalker">
|
||||
<module name="SuppressWarningsHolder"/>
|
||||
<module name="MissingDeprecated"/>
|
||||
<module name="MissingOverride"/>
|
||||
<module name="AnnotationLocation"/>
|
||||
<module name="JavadocStyle"/>
|
||||
<module name="NonEmptyAtclauseDescription"/>
|
||||
<module name="IllegalImport"/>
|
||||
<module name="RedundantImport"/>
|
||||
<module name="RedundantModifier"/>
|
||||
<module name="EmptyBlock"/>
|
||||
<module name="DefaultComesLast"/>
|
||||
<module name="EmptyStatement"/>
|
||||
<module name="EqualsHashCode"/>
|
||||
<module name="ExplicitInitialization"/>
|
||||
<module name="IllegalInstantiation"/>
|
||||
<module name="ModifiedControlVariable"/>
|
||||
<module name="MultipleVariableDeclarations"/>
|
||||
<module name="PackageDeclaration"/>
|
||||
<module name="ParameterAssignment"/>
|
||||
<module name="SimplifyBooleanExpression"/>
|
||||
<module name="SimplifyBooleanReturn"/>
|
||||
<module name="StringLiteralEquality"/>
|
||||
<module name="OneStatementPerLine"/>
|
||||
<module name="FinalClass"/>
|
||||
<module name="ArrayTypeStyle"/>
|
||||
<module name="UpperEll"/>
|
||||
<module name="OuterTypeFilename"/>
|
||||
</module>
|
||||
<module name="FileTabCharacter"/>
|
||||
<module name="SuppressWarningsFilter"/>
|
||||
</module>
|
||||
20
config/pmd/pmd.xml
Normal file
20
config/pmd/pmd.xml
Normal file
@ -0,0 +1,20 @@
|
||||
<?xml version="1.0"?>
|
||||
<ruleset name="Custom ruleset"
|
||||
xmlns="http://pmd.sourceforge.net/ruleset/2.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://pmd.sourceforge.net/ruleset/2.0.0 http://pmd.sourceforge.net/ruleset_2_0_0.xsd">
|
||||
|
||||
<description>
|
||||
Knecon ruleset checks the code for bad stuff
|
||||
</description>
|
||||
|
||||
<rule ref="category/java/errorprone.xml">
|
||||
<exclude name="MissingSerialVersionUID"/>
|
||||
<exclude name="AvoidLiteralsInIfCondition"/>
|
||||
<exclude name="AvoidDuplicateLiterals"/>
|
||||
<exclude name="NullAssignment"/>
|
||||
<exclude name="AssignmentInOperand"/>
|
||||
<exclude name="BeanMembersShouldSerialize"/>
|
||||
</rule>
|
||||
|
||||
</ruleset>
|
||||
22
config/pmd/test_pmd.xml
Normal file
22
config/pmd/test_pmd.xml
Normal file
@ -0,0 +1,22 @@
|
||||
<?xml version="1.0"?>
|
||||
<ruleset name="Custom ruleset"
|
||||
xmlns="http://pmd.sourceforge.net/ruleset/2.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://pmd.sourceforge.net/ruleset/2.0.0 http://pmd.sourceforge.net/ruleset_2_0_0.xsd">
|
||||
|
||||
<description>
|
||||
Knecon test ruleset checks the code for bad stuff
|
||||
</description>
|
||||
|
||||
|
||||
<rule ref="category/java/errorprone.xml">
|
||||
<exclude name="MissingSerialVersionUID"/>
|
||||
<exclude name="AvoidLiteralsInIfCondition"/>
|
||||
<exclude name="AvoidDuplicateLiterals"/>
|
||||
<exclude name="NullAssignment"/>
|
||||
<exclude name="AssignmentInOperand"/>
|
||||
<exclude name="TestClassWithoutTestCases"/>
|
||||
<exclude name="BeanMembersShouldSerialize"/>
|
||||
</rule>
|
||||
|
||||
</ruleset>
|
||||
1
settings.gradle.kts
Normal file
1
settings.gradle.kts
Normal file
@ -0,0 +1 @@
|
||||
rootProject.name = "pdftron-logic-commons"
|
||||
@ -62,10 +62,11 @@ public class Converter {
|
||||
@SneakyThrows
|
||||
public static Color convertColor(ColorSpace colorSpace, ColorPt colorPt) {
|
||||
|
||||
ColorPt rgbColor = colorSpace.convert2RGB(colorPt);
|
||||
Color color = new Color((float) rgbColor.get(0), (float) rgbColor.get(1), (float) rgbColor.get(2));
|
||||
rgbColor.destroy();
|
||||
return color;
|
||||
try (ColorPt rgbColor = colorSpace.convert2RGB(colorPt)) {
|
||||
Color color = new Color((float) rgbColor.get(0), (float) rgbColor.get(1), (float) rgbColor.get(2));
|
||||
rgbColor.destroy();
|
||||
return color;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -103,6 +103,7 @@ public class ElementFeatures {
|
||||
@Getter
|
||||
@SuperBuilder
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
@SuppressWarnings("PMD")
|
||||
public static class Text extends ElementFeatures {
|
||||
|
||||
String text;
|
||||
|
||||
@ -35,17 +35,19 @@ public class ImageHashFactory {
|
||||
@SneakyThrows
|
||||
private byte[] getBytesOfImage(com.pdftron.pdf.Image inputImage) {
|
||||
// 0 because the memory filter determines the size
|
||||
var memFilter = new MemoryFilter(0, false);
|
||||
var filterWriter = new FilterWriter(memFilter);
|
||||
try(var memFilter = new MemoryFilter(0, false)) {
|
||||
try(var filterWriter = new FilterWriter(memFilter)) {
|
||||
|
||||
inputImage.export(filterWriter);
|
||||
filterWriter.flushAll();
|
||||
byte[] res = memFilter.getBuffer();
|
||||
inputImage.export(filterWriter);
|
||||
filterWriter.flushAll();
|
||||
byte[] res = memFilter.getBuffer();
|
||||
|
||||
memFilter.flushAll();
|
||||
memFilter.destroy();
|
||||
filterWriter.destroy();
|
||||
return res;
|
||||
memFilter.flushAll();
|
||||
memFilter.destroy();
|
||||
filterWriter.destroy();
|
||||
return res;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -151,39 +151,40 @@ public class InvisibleElementRemovalService {
|
||||
private void execute(PDFDoc pdfDoc, boolean delta, boolean removePaths, Set<String> markedContentToIgnore) {
|
||||
|
||||
log.info("Start removing invisible Elements");
|
||||
ElementWriter writer = new ElementWriter();
|
||||
ElementReader reader = new ElementReader();
|
||||
Set<Long> visitedXObjIds = new TreeSet<>();
|
||||
try(ElementWriter writer = new ElementWriter()) {
|
||||
try(ElementReader reader = new ElementReader()) {
|
||||
Set<Long> visitedXObjIds = new TreeSet<>();
|
||||
|
||||
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
|
||||
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
|
||||
|
||||
Page page = iterator.next();
|
||||
Page page = iterator.next();
|
||||
|
||||
visitedXObjIds.add(page.getSDFObj().getObjNum());
|
||||
visitedXObjIds.add(page.getSDFObj().getObjNum());
|
||||
|
||||
InvisibleElementRemovalContext context = InvisibleElementRemovalContext.builder()
|
||||
.reader(reader)
|
||||
.clippingPathStack(new ClippingPathStack(page.getMediaBox()))
|
||||
.markedContentStack(new MarkedContentStack())
|
||||
.removePaths(removePaths)
|
||||
.delta(delta)
|
||||
.overlappedElements(new ArrayList<>())
|
||||
.visibleElements(new ArrayList<>())
|
||||
.visitedXObjIds(visitedXObjIds)
|
||||
.markedContentToIgnore(markedContentToIgnore)
|
||||
.build();
|
||||
InvisibleElementRemovalContext context = InvisibleElementRemovalContext.builder()
|
||||
.reader(reader)
|
||||
.clippingPathStack(new ClippingPathStack(page.getMediaBox()))
|
||||
.markedContentStack(new MarkedContentStack())
|
||||
.removePaths(removePaths)
|
||||
.delta(delta)
|
||||
.overlappedElements(new ArrayList<>())
|
||||
.visibleElements(new ArrayList<>())
|
||||
.visitedXObjIds(visitedXObjIds)
|
||||
.markedContentToIgnore(markedContentToIgnore)
|
||||
.build();
|
||||
|
||||
removeClippedElementsAndInvisibleTextAndRememberOverlappedElements(page, writer, context);
|
||||
removeClippedElementsAndInvisibleTextAndRememberOverlappedElements(page, writer, context);
|
||||
|
||||
context.visitedXObjIds().clear();
|
||||
context.markedContentStack().clear();
|
||||
context.visitedXObjIds().clear();
|
||||
context.markedContentStack().clear();
|
||||
|
||||
removeOverlappedElements(page, writer, context);
|
||||
removeOverlappedElements(page, writer, context);
|
||||
|
||||
}
|
||||
writer.destroy();
|
||||
reader.destroy();
|
||||
}
|
||||
}
|
||||
writer.destroy();
|
||||
reader.destroy();
|
||||
|
||||
log.info("Finished removing invisible Elements");
|
||||
}
|
||||
|
||||
@ -239,68 +240,70 @@ public class InvisibleElementRemovalService {
|
||||
|
||||
private void processImages(Element imageElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
|
||||
|
||||
Rect rect = imageElement.getBBox();
|
||||
try(Rect rect = imageElement.getBBox()) {
|
||||
|
||||
if (rect == null) {
|
||||
return;
|
||||
}
|
||||
if (rect == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
|
||||
boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
|
||||
|
||||
if (!context.delta() && inClippingPath) {
|
||||
context.visibleElements().add(ElementFeatureFactory.extractFeatures(imageElement));
|
||||
}
|
||||
if (!context.delta() && inClippingPath) {
|
||||
context.visibleElements().add(ElementFeatureFactory.extractFeatures(imageElement));
|
||||
}
|
||||
|
||||
if (context.delta() ^ inClippingPath) {
|
||||
writer.writeElement(imageElement);
|
||||
if (context.delta() ^ inClippingPath) {
|
||||
writer.writeElement(imageElement);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void processText(Element textElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
|
||||
|
||||
Rect textBBox = textElement.getBBox();
|
||||
try(Rect textBBox = textElement.getBBox()) {
|
||||
|
||||
if (textBBox == null) {
|
||||
writer.writeElement(textElement);
|
||||
return;
|
||||
}
|
||||
|
||||
GState gState = textElement.getGState();
|
||||
|
||||
boolean inClippingPath = context.clippingPathStack().almostIntersects(textBBox.getX1(), textBBox.getY1(), textBBox.getWidth(), textBBox.getHeight());
|
||||
|
||||
boolean isTextVisible = isTextRenderedVisibly(gState, textBBox, context);
|
||||
|
||||
if (inClippingPath && isTextVisible) {
|
||||
context.visibleElements().add(ElementFeatureFactory.extractFeatures(textElement));
|
||||
}
|
||||
if (!context.delta()) {
|
||||
if (inClippingPath && isTextVisible) {
|
||||
if (textBBox == null) {
|
||||
writer.writeElement(textElement);
|
||||
} else if (textElement.hasTextMatrix()) {
|
||||
return;
|
||||
}
|
||||
|
||||
GState gState = textElement.getGState();
|
||||
|
||||
boolean inClippingPath = context.clippingPathStack().almostIntersects(textBBox.getX1(), textBBox.getY1(), textBBox.getWidth(), textBBox.getHeight());
|
||||
|
||||
boolean isTextVisible = isTextRenderedVisibly(gState, textBBox, context);
|
||||
|
||||
if (inClippingPath && isTextVisible) {
|
||||
context.visibleElements().add(ElementFeatureFactory.extractFeatures(textElement));
|
||||
}
|
||||
if (!context.delta()) {
|
||||
if (inClippingPath && isTextVisible) {
|
||||
writer.writeElement(textElement);
|
||||
} else if (textElement.hasTextMatrix()) {
|
||||
/*
|
||||
PDFTron Element with type "text" refers to a Tj command. If a Tm command is just above it in the pdf file, PDFTron will join the two commands and treat them as one Element.
|
||||
hasTextMatrix() checks for this case specifically. Also, Tm changes the position for a whole BT/ET segment, possibly containing multiple Tj commands.
|
||||
Therefore, the position of a following Tj is affected by not writing the first Element.
|
||||
This is why, we write only the Tm command:
|
||||
*/
|
||||
writer.writeGStateChanges(textElement);
|
||||
}
|
||||
} else {
|
||||
if (!inClippingPath) {
|
||||
gState.setFillColorSpace(ColorSpace.createDeviceRGB());
|
||||
// red for elements removed by clipping path
|
||||
gState.setFillColor(new ColorPt(1, 0, 0));
|
||||
writer.writeElement(textElement);
|
||||
}
|
||||
if (!isTextVisible) {
|
||||
gState.setFillColorSpace(ColorSpace.createDeviceRGB());
|
||||
// blue for elements removed due to transparency or not rendered or same color as background
|
||||
gState.setFillColor(new ColorPt(0, 0, 1));
|
||||
gState.setTextRenderMode(GState.e_fill_text);
|
||||
gState.setFillOpacity(1);
|
||||
writer.writeElement(textElement);
|
||||
writer.writeGStateChanges(textElement);
|
||||
}
|
||||
} else {
|
||||
if (!inClippingPath) {
|
||||
gState.setFillColorSpace(ColorSpace.createDeviceRGB());
|
||||
// red for elements removed by clipping path
|
||||
gState.setFillColor(new ColorPt(1, 0, 0));
|
||||
writer.writeElement(textElement);
|
||||
}
|
||||
if (!isTextVisible) {
|
||||
gState.setFillColorSpace(ColorSpace.createDeviceRGB());
|
||||
// blue for elements removed due to transparency or not rendered or same color as background
|
||||
gState.setFillColor(new ColorPt(0, 0, 1));
|
||||
gState.setTextRenderMode(GState.e_fill_text);
|
||||
gState.setFillOpacity(1);
|
||||
writer.writeElement(textElement);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -315,20 +318,21 @@ public class InvisibleElementRemovalService {
|
||||
context.visitedXObjIds().add(formObj.getObjNum());
|
||||
// writer needs to be newly initialized when entering a new content stream
|
||||
// see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest)
|
||||
ElementWriter formWriter = new ElementWriter();
|
||||
context.clippingPathStack().enterNewGState();
|
||||
context.clippingPathStack().intersectClippingPath(new GeneralPath(Converter.toRectangle2D(formElement.getBBox())));
|
||||
context.reader().formBegin();
|
||||
formWriter.begin(formObj);
|
||||
try(ElementWriter formWriter = new ElementWriter()) {
|
||||
context.clippingPathStack().enterNewGState();
|
||||
context.clippingPathStack().intersectClippingPath(new GeneralPath(Converter.toRectangle2D(formElement.getBBox())));
|
||||
context.reader().formBegin();
|
||||
formWriter.begin(formObj);
|
||||
|
||||
context.reader().clearChangeList();
|
||||
formWriter.setDefaultGState(context.reader());
|
||||
context.reader().clearChangeList();
|
||||
formWriter.setDefaultGState(context.reader());
|
||||
|
||||
processElements(formWriter, context);
|
||||
formWriter.end();
|
||||
formWriter.destroy();
|
||||
context.reader().end();
|
||||
context.clippingPathStack().leaveGState();
|
||||
processElements(formWriter, context);
|
||||
formWriter.end();
|
||||
formWriter.destroy();
|
||||
context.reader().end();
|
||||
context.clippingPathStack().leaveGState();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -481,17 +485,18 @@ public class InvisibleElementRemovalService {
|
||||
context.visitedXObjIds().add(formObj.getObjNum());
|
||||
// writer needs to be newly initialized when entering a new content stream
|
||||
// see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest)
|
||||
ElementWriter formWriter = new ElementWriter();
|
||||
context.reader().formBegin();
|
||||
formWriter.begin(formObj);
|
||||
try(ElementWriter formWriter = new ElementWriter()) {
|
||||
context.reader().formBegin();
|
||||
formWriter.begin(formObj);
|
||||
|
||||
context.reader().clearChangeList();
|
||||
formWriter.setDefaultGState(context.reader());
|
||||
context.reader().clearChangeList();
|
||||
formWriter.setDefaultGState(context.reader());
|
||||
|
||||
processOverlappedElements(formWriter, context);
|
||||
formWriter.end();
|
||||
formWriter.destroy();
|
||||
context.reader().end();
|
||||
processOverlappedElements(formWriter, context);
|
||||
formWriter.end();
|
||||
formWriter.destroy();
|
||||
context.reader().end();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -582,18 +587,20 @@ public class InvisibleElementRemovalService {
|
||||
@SneakyThrows
|
||||
private void drawBBox(ElementWriter writer, Rectangle2D r, String hexcolor) {
|
||||
|
||||
ColorPt colorPt = new ColorPt(Integer.valueOf(hexcolor.substring(1, 3), 16) / 255d,
|
||||
try(ColorPt colorPt = new ColorPt(Integer.valueOf(hexcolor.substring(1, 3), 16) / 255d,
|
||||
Integer.valueOf(hexcolor.substring(3, 5), 16) / 255d,
|
||||
Integer.valueOf(hexcolor.substring(5, 7), 16) / 255d);
|
||||
ElementBuilder eb = new ElementBuilder();
|
||||
Element rect = eb.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight());
|
||||
rect.setPathStroke(true);
|
||||
rect.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
|
||||
rect.getGState().setStrokeColor(colorPt);
|
||||
writer.writePlacedElement(rect);
|
||||
Integer.valueOf(hexcolor.substring(5, 7), 16) / 255d)) {
|
||||
try(ElementBuilder eb = new ElementBuilder()) {
|
||||
Element rect = eb.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight());
|
||||
rect.setPathStroke(true);
|
||||
rect.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
|
||||
rect.getGState().setStrokeColor(colorPt);
|
||||
writer.writePlacedElement(rect);
|
||||
|
||||
colorPt.destroy();
|
||||
eb.destroy();
|
||||
colorPt.destroy();
|
||||
eb.destroy();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -7,33 +7,33 @@ import java.util.Set;
|
||||
|
||||
public class MarkedContentStack {
|
||||
|
||||
Deque<MarkedContent> markedContentStack = new LinkedList<>();
|
||||
Deque<MarkedContent> markedContentQueue = new LinkedList<>();
|
||||
|
||||
|
||||
public void enterMarkedContent(String name) {
|
||||
|
||||
markedContentStack.push(new MarkedContent(name));
|
||||
markedContentQueue.push(new MarkedContent(name));
|
||||
}
|
||||
|
||||
|
||||
public void leaveMarkedContent() {
|
||||
|
||||
markedContentStack.pop();
|
||||
markedContentQueue.pop();
|
||||
}
|
||||
|
||||
|
||||
public String currentMarkedContent() {
|
||||
|
||||
if (markedContentStack.isEmpty()) {
|
||||
if (markedContentQueue.isEmpty()) {
|
||||
return "";
|
||||
}
|
||||
return markedContentStack.peek().name();
|
||||
return markedContentQueue.peek().name();
|
||||
}
|
||||
|
||||
|
||||
public boolean currentMarkedContentContains(String name) {
|
||||
|
||||
Iterator<MarkedContent> markedContentIterator = markedContentStack.descendingIterator();
|
||||
Iterator<MarkedContent> markedContentIterator = markedContentQueue.descendingIterator();
|
||||
while (markedContentIterator.hasNext()) {
|
||||
var markedContent = markedContentIterator.next();
|
||||
if (markedContent.name().equals(name)) {
|
||||
@ -46,10 +46,10 @@ public class MarkedContentStack {
|
||||
|
||||
public boolean currentMarkedContentContainsAny(Set<String> names) {
|
||||
|
||||
if (markedContentStack.isEmpty()) {
|
||||
if (markedContentQueue.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
Iterator<MarkedContent> markedContentIterator = markedContentStack.descendingIterator();
|
||||
Iterator<MarkedContent> markedContentIterator = markedContentQueue.descendingIterator();
|
||||
while (markedContentIterator.hasNext()) {
|
||||
var markedContent = markedContentIterator.next();
|
||||
if (names.contains(markedContent.name())) {
|
||||
@ -62,7 +62,7 @@ public class MarkedContentStack {
|
||||
|
||||
public void clear() {
|
||||
|
||||
markedContentStack.clear();
|
||||
markedContentQueue.clear();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -43,18 +43,20 @@ public class OCGWatermarkRemovalService {
|
||||
@SneakyThrows
|
||||
private void removeOCGWatermarks(PDFDoc pdfDoc) {
|
||||
|
||||
ElementReader reader = new ElementReader();
|
||||
ElementWriter writer = new ElementWriter();
|
||||
Set<Long> visitedXObjIds = new TreeSet<>();
|
||||
try(ElementReader reader = new ElementReader()) {
|
||||
try(ElementWriter writer = new ElementWriter()) {
|
||||
Set<Long> visitedXObjIds = new TreeSet<>();
|
||||
|
||||
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
|
||||
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
|
||||
|
||||
Page page = iterator.next();
|
||||
writeAllElementsExceptWatermarks(page, reader, writer, visitedXObjIds);
|
||||
Page page = iterator.next();
|
||||
writeAllElementsExceptWatermarks(page, reader, writer, visitedXObjIds);
|
||||
}
|
||||
|
||||
reader.destroy();
|
||||
writer.destroy();
|
||||
}
|
||||
}
|
||||
|
||||
reader.destroy();
|
||||
writer.destroy();
|
||||
}
|
||||
|
||||
|
||||
@ -124,17 +126,18 @@ public class OCGWatermarkRemovalService {
|
||||
visitedXObjIds.add(element.getXObject().getObjNum());
|
||||
// writer needs to be newly initialized when entering a new content stream
|
||||
// see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest)
|
||||
ElementWriter formWriter = new ElementWriter();
|
||||
reader.formBegin();
|
||||
formWriter.begin(element.getXObject());
|
||||
try(ElementWriter formWriter = new ElementWriter()) {
|
||||
reader.formBegin();
|
||||
formWriter.begin(element.getXObject());
|
||||
|
||||
reader.clearChangeList();
|
||||
formWriter.setDefaultGState(reader);
|
||||
reader.clearChangeList();
|
||||
formWriter.setDefaultGState(reader);
|
||||
|
||||
processElements(page, reader, formWriter, visitedXObjIds);
|
||||
formWriter.end();
|
||||
formWriter.destroy();
|
||||
reader.end();
|
||||
processElements(page, reader, formWriter, visitedXObjIds);
|
||||
formWriter.end();
|
||||
formWriter.destroy();
|
||||
reader.end();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -16,20 +16,21 @@ import lombok.experimental.UtilityClass;
|
||||
@UtilityClass
|
||||
public class PdfTextExtraction {
|
||||
|
||||
private static String execute(PDFDoc pdfDoc) throws IOException, PDFNetException{
|
||||
TextExtractor extractor = new TextExtractor();
|
||||
List<String> texts = new ArrayList<>();
|
||||
private static String execute(PDFDoc pdfDoc) throws PDFNetException{
|
||||
try(TextExtractor extractor = new TextExtractor()) {
|
||||
List<String> texts = new ArrayList<>();
|
||||
|
||||
PageIterator iterator = pdfDoc.getPageIterator();
|
||||
while (iterator.hasNext()) {
|
||||
Page page = iterator.next();
|
||||
extractor.begin(page);
|
||||
texts.add(extractor.getAsText());
|
||||
PageIterator iterator = pdfDoc.getPageIterator();
|
||||
while (iterator.hasNext()) {
|
||||
Page page = iterator.next();
|
||||
extractor.begin(page);
|
||||
texts.add(extractor.getAsText());
|
||||
}
|
||||
|
||||
extractor.destroy();
|
||||
pdfDoc.close();
|
||||
return String.join("\n", texts);
|
||||
}
|
||||
|
||||
extractor.destroy();
|
||||
pdfDoc.close();
|
||||
return String.join("\n", texts);
|
||||
}
|
||||
|
||||
public static String extractAllTextFromDocument(InputStream fileStream) throws IOException, PDFNetException {
|
||||
|
||||
@ -93,27 +93,28 @@ public class WatermarkRemovalService {
|
||||
Map<Long, List<ElementFeatures>> formObjectsAndImagesForPages = new HashMap<>();
|
||||
Set<Long> visitedXObjIds = new TreeSet<>();
|
||||
|
||||
ElementReader reader = new ElementReader();
|
||||
try(ElementReader reader = new ElementReader()) {
|
||||
|
||||
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
|
||||
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
|
||||
|
||||
Page page = iterator.next();
|
||||
Page page = iterator.next();
|
||||
|
||||
double minAreaCoveringFromPage = AREA_THRESHOLD * page.getPageHeight() * page.getPageWidth();
|
||||
double minAreaCoveringFromPage = AREA_THRESHOLD * page.getPageHeight() * page.getPageWidth();
|
||||
|
||||
LinkedList<ElementFeatures> elementFeaturesLinkedList = new LinkedList<>();
|
||||
LinkedList<ElementFeatures> elementFeaturesLinkedList = new LinkedList<>();
|
||||
|
||||
reader.begin(page);
|
||||
for (Element element = reader.next(); element != null; element = reader.next()) {
|
||||
processElement(element, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringFromPage, page);
|
||||
reader.begin(page);
|
||||
for (Element element = reader.next(); element != null; element = reader.next()) {
|
||||
processElement(element, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringFromPage, page);
|
||||
}
|
||||
|
||||
formObjectsAndImagesForPages.put(page.getSDFObj().getObjNum(), elementFeaturesLinkedList);
|
||||
}
|
||||
|
||||
formObjectsAndImagesForPages.put(page.getSDFObj().getObjNum(), elementFeaturesLinkedList);
|
||||
reader.destroy();
|
||||
|
||||
return formObjectsAndImagesForPages;
|
||||
}
|
||||
|
||||
reader.destroy();
|
||||
|
||||
return formObjectsAndImagesForPages;
|
||||
}
|
||||
|
||||
|
||||
@ -220,13 +221,14 @@ public class WatermarkRemovalService {
|
||||
}
|
||||
|
||||
if (visitedXObjIds.add(element.getXObject().getObjNum())) {
|
||||
ElementReader xObjectReader = new ElementReader();
|
||||
xObjectReader.begin(element.getXObject());
|
||||
for (Element element1 = xObjectReader.next(); element1 != null; element1 = xObjectReader.next()) {
|
||||
processElement(element1, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringPage, page);
|
||||
try(ElementReader xObjectReader = new ElementReader()) {
|
||||
xObjectReader.begin(element.getXObject());
|
||||
for (Element element1 = xObjectReader.next(); element1 != null; element1 = xObjectReader.next()) {
|
||||
processElement(element1, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringPage, page);
|
||||
}
|
||||
elementFeaturesLinkedList.add(ElementFeatureFactory.extractFeatures(element));
|
||||
xObjectReader.destroy();
|
||||
}
|
||||
elementFeaturesLinkedList.add(ElementFeatureFactory.extractFeatures(element));
|
||||
xObjectReader.destroy();
|
||||
} else {
|
||||
elementFeaturesLinkedList.add(ElementFeatureFactory.extractFeatures(element));
|
||||
}
|
||||
@ -256,21 +258,22 @@ public class WatermarkRemovalService {
|
||||
@SneakyThrows
|
||||
private void removeAllWatermarks(PDFDoc pdfDoc, List<ElementFeatures> watermarksElementFeaturesList) {
|
||||
|
||||
ElementReader reader = new ElementReader();
|
||||
ElementWriter writer = new ElementWriter();
|
||||
Set<Long> visitedXObjIds = new TreeSet<>();
|
||||
try(ElementReader reader = new ElementReader()) {
|
||||
try (ElementWriter writer = new ElementWriter()) {
|
||||
Set<Long> visitedXObjIds = new TreeSet<>();
|
||||
|
||||
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
|
||||
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
|
||||
|
||||
Page page = iterator.next();
|
||||
Page page = iterator.next();
|
||||
|
||||
writeAllElementsExceptWatermarks(page, reader, writer, watermarksElementFeaturesList, visitedXObjIds);
|
||||
writeAllElementsExceptWatermarks(page, reader, writer, watermarksElementFeaturesList, visitedXObjIds);
|
||||
|
||||
}
|
||||
|
||||
reader.destroy();
|
||||
writer.destroy();
|
||||
}
|
||||
}
|
||||
|
||||
reader.destroy();
|
||||
writer.destroy();
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -390,17 +393,18 @@ public class WatermarkRemovalService {
|
||||
visitedXObjIds.add(element.getXObject().getObjNum());
|
||||
// writer needs to be newly initialized when entering a new content stream
|
||||
// see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest)
|
||||
ElementWriter formWriter = new ElementWriter();
|
||||
reader.formBegin();
|
||||
formWriter.begin(element.getXObject());
|
||||
try(ElementWriter formWriter = new ElementWriter()) {
|
||||
reader.formBegin();
|
||||
formWriter.begin(element.getXObject());
|
||||
|
||||
reader.clearChangeList();
|
||||
formWriter.setDefaultGState(reader);
|
||||
reader.clearChangeList();
|
||||
formWriter.setDefaultGState(reader);
|
||||
|
||||
processElements(page, reader, formWriter, watermarksElementFeaturesList, visitedXObjIds);
|
||||
formWriter.end();
|
||||
formWriter.destroy();
|
||||
reader.end();
|
||||
processElements(page, reader, formWriter, watermarksElementFeaturesList, visitedXObjIds);
|
||||
formWriter.end();
|
||||
formWriter.destroy();
|
||||
reader.end();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -13,6 +13,7 @@ import com.pdftron.pdf.PDFNet;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@SuppressWarnings("PMD")
|
||||
class InvisibleElementRemovalServiceTest {
|
||||
|
||||
InvisibleElementRemovalService invisibleElementRemovalService;
|
||||
|
||||
@ -13,6 +13,7 @@ import com.pdftron.pdf.PDFNet;
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@Disabled
|
||||
@SuppressWarnings("PMD")
|
||||
class WatermarkRemovalServiceTest {
|
||||
|
||||
@SneakyThrows
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user