diff --git a/bamboo-specs/pom.xml b/bamboo-specs/pom.xml new file mode 100644 index 0000000..101e4ba --- /dev/null +++ b/bamboo-specs/pom.xml @@ -0,0 +1,36 @@ + + 4.0.0 + + + com.atlassian.bamboo + bamboo-specs-parent + 8.1.3 + + + + bamboo-specs + 1.0.0-SNAPSHOT + jar + + + + com.atlassian.bamboo + bamboo-specs-api + + + com.atlassian.bamboo + bamboo-specs + + + + + junit + junit + test + + + + + + \ No newline at end of file diff --git a/bamboo-specs/src/main/java/buildjob/PlanSpec.java b/bamboo-specs/src/main/java/buildjob/PlanSpec.java new file mode 100644 index 0000000..9531f57 --- /dev/null +++ b/bamboo-specs/src/main/java/buildjob/PlanSpec.java @@ -0,0 +1,127 @@ +package buildjob; + +import static com.atlassian.bamboo.specs.builders.task.TestParserTask.createJUnitParserTask; + +import com.atlassian.bamboo.specs.api.BambooSpec; +import com.atlassian.bamboo.specs.api.builders.BambooKey; +import com.atlassian.bamboo.specs.api.builders.docker.DockerConfiguration; +import com.atlassian.bamboo.specs.api.builders.permission.PermissionType; +import com.atlassian.bamboo.specs.api.builders.permission.Permissions; +import com.atlassian.bamboo.specs.api.builders.permission.PlanPermissions; +import com.atlassian.bamboo.specs.api.builders.plan.Job; +import com.atlassian.bamboo.specs.api.builders.plan.Plan; +import com.atlassian.bamboo.specs.api.builders.plan.PlanIdentifier; +import com.atlassian.bamboo.specs.api.builders.plan.Stage; +import com.atlassian.bamboo.specs.api.builders.plan.branches.BranchCleanup; +import com.atlassian.bamboo.specs.api.builders.plan.branches.PlanBranchManagement; +import com.atlassian.bamboo.specs.api.builders.project.Project; +import com.atlassian.bamboo.specs.api.builders.Variable; +import com.atlassian.bamboo.specs.builders.task.CheckoutItem; +import com.atlassian.bamboo.specs.builders.task.InjectVariablesTask; +import com.atlassian.bamboo.specs.builders.task.ScriptTask; +import com.atlassian.bamboo.specs.builders.task.VcsCheckoutTask; +import com.atlassian.bamboo.specs.builders.task.VcsTagTask; +import com.atlassian.bamboo.specs.builders.trigger.BitbucketServerTrigger; +import com.atlassian.bamboo.specs.model.task.InjectVariablesScope; +import com.atlassian.bamboo.specs.util.BambooServer; +import com.atlassian.bamboo.specs.model.task.ScriptTaskProperties.Location; + +/** + * Plan configuration for Bamboo. + * Learn more on: https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs + */ +@BambooSpec +public class PlanSpec { + + private static final String SERVICE_NAME = "pdftron-logic-commons"; + + private static final String SERVICE_KEY = SERVICE_NAME.toUpperCase().replaceAll("-", ""); + + /** + * Run main to publish plan on Bamboo + */ + public static void main(final String[] args) throws Exception { + //By default credentials are read from the '.credentials' file. + BambooServer bambooServer = new BambooServer("http://localhost:8085"); + + Plan plan = new PlanSpec().createPlan(); + bambooServer.publish(plan); + PlanPermissions planPermission = new PlanSpec().createPlanPermission(plan.getIdentifier()); + bambooServer.publish(planPermission); + } + + private PlanPermissions createPlanPermission(PlanIdentifier planIdentifier) { + Permissions permission = new Permissions() + .userPermissions("atlbamboo", PermissionType.EDIT, PermissionType.VIEW, PermissionType.ADMIN, PermissionType.CLONE, PermissionType.BUILD) + .groupPermissions("development", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD) + .groupPermissions("devplant", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD) + .loggedInUserPermissions(PermissionType.VIEW) + .anonymousUserPermissionView(); + return new PlanPermissions(planIdentifier.getProjectKey(), planIdentifier.getPlanKey()).permissions(permission); + } + + private Project project() { + return new Project() + .name("RED") + .key(new BambooKey("RED")); + } + + public Plan createPlan() { + return new Plan( + project(), + SERVICE_NAME, new BambooKey(SERVICE_KEY)) + .description("Plan created from (enter repository url of your plan)") + .variables(new Variable("maven_add_param", "")) + .stages(new Stage("Default Stage") + .jobs(new Job("Default Job", + new BambooKey("JOB1")) + .tasks( + new ScriptTask() + .description("Clean") + .inlineBody("#!/bin/bash\n" + + "set -e\n" + + "rm -rf ./*"), + new VcsCheckoutTask() + .description("Checkout Default Repository") + .checkoutItems(new CheckoutItem().defaultRepository()), + new ScriptTask() + .description("Build") + .location(Location.FILE) + .fileFromPath("bamboo-specs/src/main/resources/scripts/build-java.sh") + .argument(SERVICE_NAME), + createJUnitParserTask() + .description("Resultparser") + .resultDirectories("**/test-reports/*.xml, **/target/surefire-reports/*.xml, **/target/failsafe-reports/*.xml") + .enabled(true), + new ScriptTask() + .description("Sonar") + .location(Location.FILE) + .fileFromPath("bamboo-specs/src/main/resources/scripts/sonar-java.sh") + .argument(SERVICE_NAME), + new InjectVariablesTask() + .description("Inject git Tag") + .path("git.tag") + .namespace("g") + .scope(InjectVariablesScope.LOCAL), + new VcsTagTask() + .description("${bamboo.g.gitTag}") + .tagName("${bamboo.g.gitTag}") + .defaultRepository()) + .dockerConfiguration( + new DockerConfiguration() + .image("nexus.iqser.com:5001/infra/maven:3.8.4-openjdk-17-slim") + .volume("/etc/maven/settings.xml", "/usr/share/maven/conf/settings.xml") + .dockerRunArguments("--net=host") + .volume("/var/run/docker.sock", "/var/run/docker.sock") + ) + ) + ) + .linkedRepositories("RED / " + SERVICE_NAME) + .triggers(new BitbucketServerTrigger()) + .planBranchManagement(new PlanBranchManagement() + .createForVcsBranch() + .delete(new BranchCleanup() + .whenInactiveInRepositoryAfterDays(14)) + .notificationForCommitters()); + } +} diff --git a/bamboo-specs/src/main/resources/scripts/build-java.sh b/bamboo-specs/src/main/resources/scripts/build-java.sh new file mode 100644 index 0000000..901e3ee --- /dev/null +++ b/bamboo-specs/src/main/resources/scripts/build-java.sh @@ -0,0 +1,52 @@ +#!/bin/bash +set -e + +SERVICE_NAME=$1 + +if [[ "$bamboo_planRepository_branchName" == "master" ]] +then + branchVersion=$(cat pom.xml | grep -Eo " .*-SNAPSHOT" | sed -s 's|\(.*\)\..*\(-*.*\)|\1|' | tr -d ' ') + latestVersion=$( semver $(git tag -l "${branchVersion}.*" ) | tail -n1 ) + newVersion="$(semver $latestVersion -p -i minor)" + echo "new release on master with version $newVersion" +elif [[ "$bamboo_planRepository_branchName" == release* ]] +then + branchVersion=$(echo $bamboo_planRepository_branchName | sed -s 's|release\/\([0-9]\+\.[0-9]\+\)\.x|\1|') + latestVersion=$( semver $(git tag -l "${branchVersion}.*" ) | tail -n1 ) + newVersion="$(semver $latestVersion -p -i patch)" + echo "new release on $bamboo_planRepository_branchName with version $newVersion" +elif [[ "${bamboo_version_tag}" != "dev" ]] +then + newVersion="${bamboo_version_tag}" + echo "new special version bild with $newVersion" +else + mvn -f ${bamboo_build_working_directory}/pom.xml \ + --no-transfer-progress \ + ${bamboo_maven_add_param} \ + clean install \ + -Djava.security.egd=file:/dev/./urandomelse + echo "dev build with tag ${bamboo_planRepository_1_branch}_${bamboo_buildNumber}" + echo "gitTag=${bamboo_planRepository_1_branch}_${bamboo_buildNumber}" > git.tag + exit 0 +fi + +echo "gitTag=${newVersion}" > git.tag + +mvn --no-transfer-progress \ + -f ${bamboo_build_working_directory}/pom.xml \ + ${bamboo_maven_add_param} \ + versions:set \ + -DnewVersion=${newVersion} + +mvn -f ${bamboo_build_working_directory}/pom.xml \ + --no-transfer-progress \ + clean deploy \ + ${bamboo_maven_add_param} \ + -e \ + -DdeployAtEnd=true \ + -Dmaven.wagon.http.ssl.insecure=true \ + -Dmaven.wagon.http.ssl.allowall=true \ + -Dmaven.wagon.http.ssl.ignore.validity.dates=true \ + -DaltDeploymentRepository=iqser_release::default::https://nexus.iqser.com/repository/red-platform-releases + + diff --git a/bamboo-specs/src/main/resources/scripts/sonar-java.sh b/bamboo-specs/src/main/resources/scripts/sonar-java.sh new file mode 100644 index 0000000..5ef675a --- /dev/null +++ b/bamboo-specs/src/main/resources/scripts/sonar-java.sh @@ -0,0 +1,37 @@ +#!/bin/bash +set -e + +SERVICE_NAME=$1 + +echo "dependency-check:aggregate" +mvn --no-transfer-progress \ + org.owasp:dependency-check-maven:aggregate \ + -DknownExploitedEnabled=false + +if [[ -z "${bamboo_repository_pr_key}" ]] +then + echo "Sonar Scan for branch: ${bamboo_planRepository_1_branch}" + mvn --no-transfer-progress \ + sonar:sonar \ + -Dsonar.projectKey=RED_$SERVICE_NAME \ + -Dsonar.host.url=https://sonarqube.iqser.com \ + -Dsonar.login=${bamboo_sonarqube_api_token_secret} \ + -Dsonar.branch.name=${bamboo_planRepository_1_branch} \ + -Dsonar.dependencyCheck.jsonReportPath=target/dependency-check-report.json \ + -Dsonar.dependencyCheck.xmlReportPath=target/dependency-check-report.xml \ + -Dsonar.dependencyCheck.htmlReportPath=target/dependency-check-report.html + +else + echo "Sonar Scan for PR with key1: ${bamboo_repository_pr_key}" + mvn --no-transfer-progress \ + sonar:sonar \ + -Dsonar.projectKey=RED_$SERVICE_NAME \ + -Dsonar.host.url=https://sonarqube.iqser.com \ + -Dsonar.login=${bamboo_sonarqube_api_token_secret} \ + -Dsonar.pullrequest.key=${bamboo_repository_pr_key} \ + -Dsonar.pullrequest.branch=${bamboo_repository_pr_sourceBranch} \ + -Dsonar.pullrequest.base=${bamboo_repository_pr_targetBranch} \ + -Dsonar.dependencyCheck.jsonReportPath=target/dependency-check-report.json \ + -Dsonar.dependencyCheck.xmlReportPath=target/dependency-check-report.xml \ + -Dsonar.dependencyCheck.htmlReportPath=target/dependency-check-report.html +fi diff --git a/bamboo-specs/src/test/java/buildjob/PlanSpecTest.java b/bamboo-specs/src/test/java/buildjob/PlanSpecTest.java new file mode 100644 index 0000000..d53aec1 --- /dev/null +++ b/bamboo-specs/src/test/java/buildjob/PlanSpecTest.java @@ -0,0 +1,16 @@ +package buildjob; + + +import com.atlassian.bamboo.specs.api.builders.plan.Plan; +import com.atlassian.bamboo.specs.api.exceptions.PropertiesValidationException; +import com.atlassian.bamboo.specs.api.util.EntityPropertiesBuilders; +import org.junit.Test; + +public class PlanSpecTest { + @Test + public void checkYourPlanOffline() throws PropertiesValidationException { + Plan plan = new PlanSpec().createPlan(); + + EntityPropertiesBuilders.build(plan); + } +} \ No newline at end of file diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..73c1ece --- /dev/null +++ b/pom.xml @@ -0,0 +1,105 @@ + + + + + platform-dependency + com.iqser.red + 1.17.0 + + + 4.0.0 + + + UTF-8 + + + pdftron-logic-commons + com.iqser.red.commons + 1.0-SNAPSHOT + + + + + org.slf4j + slf4j-api + provided + + + com.google.guava + guava + + + + org.springframework.boot + spring-boot-starter + provided + + + + org.springframework.boot + spring-boot-starter-web + provided + + + + org.springframework.boot + spring-boot-configuration-processor + provided + + + com.pdftron + PDFNet + 9.4.0 + provided + + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + org.jacoco + jacoco-maven-plugin + + + prepare-agent + + prepare-agent + + + + report + + report + + + + + + + + + + + + pdftron + PDFNet Maven + https://pdftron.com/maven/release + + + + diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/ClippingPathStack.java b/src/main/java/com/iqser/red/pdftronlogic/commons/ClippingPathStack.java new file mode 100644 index 0000000..5d8f8c0 --- /dev/null +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/ClippingPathStack.java @@ -0,0 +1,70 @@ +package com.iqser.red.pdftronlogic.commons; + + + +import static com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService.TOLERANCE; + +import java.awt.geom.Area; +import java.awt.geom.GeneralPath; +import java.awt.geom.Rectangle2D; +import java.util.Deque; +import java.util.LinkedList; + +import com.pdftron.pdf.Rect; + +import lombok.Data; +import lombok.SneakyThrows; + +@Data +public class ClippingPathStack { + + private Deque stack = new LinkedList<>(); + + + @SneakyThrows + public ClippingPathStack(Rect rectangle) { + + stack.push(new Area(new Rectangle2D.Double(rectangle.getX1(), rectangle.getY1(), rectangle.getWidth(), rectangle.getHeight()).getBounds2D())); + } + + + @SneakyThrows + public void intersectClippingPath(GeneralPath path) { + + getCurrentClippingPath().intersect(new Area(path)); + } + + + public boolean almostIntersects(double x, double y, double width, double height) { + // To address inconsistencies in the calculation of the bounding box we slightly increase the rectangle + // Height or width are zero for straight lines, even though they are being rendered. Therefore, height or width must be at minimum >0. + + double x_with_tolerance = x > 0 ? x - TOLERANCE : x + TOLERANCE; + double y_with_tolerance = y > 0 ? y - TOLERANCE : y + TOLERANCE; + double width_with_tolerance = width + (2 * TOLERANCE); + double height_with_tolerance = height + (2 * TOLERANCE); + return getCurrentClippingPath().intersects(x_with_tolerance, y_with_tolerance, width_with_tolerance, height_with_tolerance); + } + + + public Area getCurrentClippingPath() { + + return stack.peek(); + } + + + public void enterNewGState() { + + Area current = stack.peek(); + Area cloned = new Area(); + cloned.add(current); + stack.push(cloned); + } + + + public void leaveGState() { + + stack.pop(); + } + +} \ No newline at end of file diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/ElementFeatures.java b/src/main/java/com/iqser/red/pdftronlogic/commons/ElementFeatures.java new file mode 100644 index 0000000..4114d96 --- /dev/null +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/ElementFeatures.java @@ -0,0 +1,170 @@ +package com.iqser.red.pdftronlogic.commons; + +import static com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService.TOLERANCE; + +import java.awt.geom.Rectangle2D; + +import com.pdftron.common.PDFNetException; +import com.pdftron.pdf.Element; +import com.pdftron.pdf.Rect; + +import lombok.AccessLevel; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.SneakyThrows; +import lombok.experimental.FieldDefaults; +import lombok.experimental.SuperBuilder; + +@Getter +@SuperBuilder +@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) +public class ElementFeatures { + + int elementType; + Rectangle2D boundingBox; + + + public boolean almostMatches(Element element) throws PDFNetException { + + return element.getType() == elementType && // + element.getBBox() != null && // + rectsAlmostMatch(element.getBBox()); + } + + + protected boolean almostEqual(double a, double b) { + + return Math.abs(a - b) < TOLERANCE; + } + + + @SneakyThrows + private boolean rectsAlmostMatch(Rect bBox) { + // To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance + + return almostEqual(bBox.getX1(), boundingBox.getX()) && // + almostEqual(bBox.getY1(), boundingBox.getY()) && // + almostEqual(bBox.getWidth(), boundingBox.getWidth()) && // + almostEqual(bBox.getHeight(), boundingBox.getHeight()); + } + + + @EqualsAndHashCode(callSuper = true) + @Getter + @SuperBuilder + @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) + private static class Text extends ElementFeatures { + + String text; + int font; + double fontsize; + + + @Override + public boolean almostMatches(Element element) throws PDFNetException { + + return super.almostMatches(element) && // + text.equals(element.getTextString()) && // + font == element.getGState().getFont().getType() && // + almostEqual(fontsize, element.getGState().getFontSize()); + } + + } + + @EqualsAndHashCode(callSuper = true) + @Getter + @SuperBuilder + @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) + private static class Path extends ElementFeatures { + + boolean isClippingPath; + boolean isClipWindingFill; + boolean isStroked; + boolean isFilled; + boolean isWindingFill; + + + @Override + public boolean almostMatches(Element element) throws PDFNetException { + + return super.almostMatches(element) && // + isClippingPath == element.isClippingPath() && // + isClipWindingFill == element.isClipWindingFill() && // + isStroked == element.isStroked() && // + isFilled == element.isFilled() && // + isWindingFill == element.isWindingFill(); + + } + + } + + @EqualsAndHashCode(callSuper = true) + @Getter + @SuperBuilder + @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) + private static class Image extends ElementFeatures { + + int dataSize; + int height; + int width; + int renderingIntent; + int componentNum; + int bitsPerComponent; + + + @Override + public boolean almostMatches(Element element) throws PDFNetException { + + return super.almostMatches(element) && // + dataSize == element.getImageDataSize() && // + height == element.getImageHeight() && // + width == element.getImageWidth() && // + renderingIntent == element.getImageRenderingIntent() && // + componentNum == element.getComponentNum() && // + bitsPerComponent == element.getBitsPerComponent(); + } + + } + + + public static ElementFeatures extractFeatures(Element element) throws PDFNetException { + + return switch (element.getType()) { + case Element.e_path -> Path.builder() + .elementType(element.getType()) + .boundingBox(toRectangle2D(element.getBBox())) + .isClippingPath(element.isClippingPath()) + .isClipWindingFill(element.isClipWindingFill()) + .isStroked(element.isStroked()) + .isFilled(element.isFilled()) + .isWindingFill(element.isWindingFill()) + .build(); + case Element.e_text -> Text.builder() + .elementType(element.getType()) + .boundingBox(toRectangle2D(element.getBBox())) + .text(element.getTextString()) + .font(element.getGState().getFont().getType()) + .fontsize(element.getGState().getFontSize()) + .build(); + case Element.e_image, Element.e_inline_image -> Image.builder() + .elementType(element.getType()) + .boundingBox(toRectangle2D(element.getBBox())) + .dataSize(element.getImageDataSize()) + .height(element.getImageHeight()) + .width(element.getImageWidth()) + .renderingIntent(element.getImageRenderingIntent()) + .componentNum(element.getComponentNum()) + .bitsPerComponent(element.getBitsPerComponent()) + .build(); + // This technically should never happen, it's a safetynet + default -> throw new RuntimeException("Feature Extraction is not supported for PDFTron.Element with type: " + element.getType()); + }; + } + + + private static Rectangle2D toRectangle2D(Rect rect) throws PDFNetException { + + return new Rectangle2D.Double(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight()); + } + +} diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java b/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java new file mode 100644 index 0000000..d0345ad --- /dev/null +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java @@ -0,0 +1,464 @@ +package com.iqser.red.pdftronlogic.commons; + +import java.awt.Shape; +import java.awt.geom.AffineTransform; +import java.awt.geom.GeneralPath; +import java.awt.geom.Rectangle2D; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import java.util.TreeSet; + +import org.springframework.stereotype.Service; + +import com.google.common.primitives.Bytes; +import com.google.common.primitives.Doubles; +import com.pdftron.common.Matrix2D; +import com.pdftron.common.PDFNetException; +import com.pdftron.pdf.ColorPt; +import com.pdftron.pdf.ColorSpace; +import com.pdftron.pdf.Element; +import com.pdftron.pdf.ElementBuilder; +import com.pdftron.pdf.ElementReader; +import com.pdftron.pdf.ElementWriter; +import com.pdftron.pdf.GState; +import com.pdftron.pdf.PDFDoc; +import com.pdftron.pdf.Page; +import com.pdftron.pdf.PageIterator; +import com.pdftron.pdf.PathData; +import com.pdftron.pdf.Rect; +import com.pdftron.sdf.Obj; +import com.pdftron.sdf.SDFDoc; + +import lombok.Builder; +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Service +public class InvisibleElementRemovalService { + + static public final double TOLERANCE = 1e-3; + + + /** + * Removes all hidden Text, Path and Image Elements from a PDF Document. + * handled cases: + * -Text which is transparent or is set to not render + * -Elements outside of clipping path + * -Elements that have been painted over by visible and filled Paths + * unhandled cases: + * -Elements covered by widely stroked path + * -Elements with the same color as background + * -Any Text set to clipping with its many interactions with other elements + * + * @param pdfFile The PDF file to process + * @param delta If this flag is set only the removed Elements will be written to the output file. + * The Elements are red if they are removed by clipping path, blue for transparency, and a green bounding box for overlap. + * @param out OutputStream to write the resulting file to + **/ + @SneakyThrows + public void removeInvisibleElements(InputStream pdfFile, OutputStream out, boolean delta) { + + PDFDoc pdfDoc = new PDFDoc(pdfFile); + + ElementWriter writer = new ElementWriter(); + ElementReader reader = new ElementReader(); + Set visitedXObjIds = new TreeSet<>(); + + for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) { + + Page page = iterator.next(); + + visitedXObjIds.add(page.getSDFObj().getObjNum()); + + + InvisibleElementRemovalContext context = InvisibleElementRemovalContext.builder() + .reader(reader) + .clippingPathStack(new ClippingPathStack(page.getMediaBox())) + .delta(delta) + .overlappedElements(new ArrayList<>()) + .visibleElements(new ArrayList<>()) + .visitedXObjIds(visitedXObjIds) + .build(); + + removeClippedElementsAndInvisibleTextAndRememberOverlappedElements(page, writer, context); + + context.visitedXObjIds().clear(); + + removeOverlappedElements(page, writer, context); + } + + try { + pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null); + } catch (Exception e) { + log.error("File could not be saved after invisible element removal"); + throw new RuntimeException(e); + } + + writer.destroy(); + reader.destroy(); + pdfDoc.close(); + } + + + private void removeClippedElementsAndInvisibleTextAndRememberOverlappedElements(Page page, + ElementWriter writer, + InvisibleElementRemovalContext context) throws PDFNetException { + + context.reader().begin(page); + writer.begin(page, ElementWriter.e_replacement, false, true, page.getResourceDict()); + processElements(writer, context); + writer.end(); + context.reader().end(); + } + + + private void processElements(ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException { + + for (Element element = context.reader().next(); element != null; element = context.reader().next()) + switch (element.getType()) { + case Element.e_image, Element.e_inline_image -> processImages(element, writer, context); + case Element.e_text -> processText(element, writer, context); + case Element.e_path -> processPath(element, writer, context); + case Element.e_form -> processForm(element, writer, context); + case Element.e_group_begin -> { + context.clippingPathStack().enterNewGState(); + writer.writeElement(element); + } + case Element.e_group_end -> { + context.clippingPathStack().leaveGState(); + writer.writeElement(element); + } + default -> writer.writeElement(element); + } + } + + + private void processImages(Element imageElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException { + + Rect rect = imageElement.getBBox(); + + if (rect == null) { + return; + } + + boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight()); + + if (!context.delta() && inClippingPath) { + context.visibleElements().add(ElementFeatures.extractFeatures(imageElement)); + } + + if (context.delta() ^ inClippingPath) { + writer.writeElement(imageElement); + } + } + + + private void processText(Element textElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException { + + Rect rect = textElement.getBBox(); + + if (rect == null) { + writer.writeElement(textElement); + return; + } + + GState gState = textElement.getGState(); + + boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight()); + + boolean isTextVisible = isTextRenderedVisibly(gState); + + if (inClippingPath && isTextVisible) { + context.visibleElements().add(ElementFeatures.extractFeatures(textElement)); + } + if (!context.delta()) { + if (inClippingPath && isTextVisible) { + writer.writeElement(textElement); + } else if (textElement.hasTextMatrix()) { + /* + PDFTron Element with type "text" refers to a Tj command. If a Tm command is just above it in the pdf file, PDFTron will join the two commands and treat them as one Element. + hasTextMatrix() checks for this case specifically. Also, Tm changes the position for a whole BT/ET segment, possibly containing multiple Tj commands. + Therefore, the position of a following Tj is affected by not writing the first Element. + This is why, we write only the Tm command: + */ + writer.writeGStateChanges(textElement); + } + } else { + if (!inClippingPath) { + gState.setFillColorSpace(ColorSpace.createDeviceRGB()); + // red for elements removed by clipping path + gState.setFillColor(new ColorPt(1, 0, 0)); + writer.writeElement(textElement); + } + if (!isTextVisible) { + gState.setFillColorSpace(ColorSpace.createDeviceRGB()); + // blue for elements removed due to transparency or not rendered + gState.setFillColor(new ColorPt(0, 0, 1)); + gState.setTextRenderMode(GState.e_fill_text); + gState.setFillOpacity(1); + writer.writeElement(textElement); + } + } + } + + + private void processForm(Element formElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException { + + writer.writeElement(formElement); + Obj formObj = formElement.getXObject(); + + if (!context.visitedXObjIds().contains(formObj.getObjNum())) { + context.visitedXObjIds().add(formObj.getObjNum()); + // writer needs to be newly initialized when entering a new content stream + // see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest) + ElementWriter formWriter = new ElementWriter(); + context.reader().formBegin(); + formWriter.begin(formObj); + + context.reader().clearChangeList(); + formWriter.setDefaultGState(context.reader()); + + processElements(formWriter, context); + formWriter.end(); + formWriter.destroy(); + context.reader().end(); + } + } + + + private void processPath(Element pathElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException { + + PathData pathData = pathElement.getPathData(); + + if (pathData.getOperators().length == 0 && pathData.getPoints().length == 0) { + writer.writeGStateChanges(pathElement); + return; + } + + GeneralPath linePath = convertToGeneralPath(pathData); + + //transform path to initial user space + var ctm = pathElement.getCTM(); + var affineTransform = toAffineTransform(ctm); + linePath.transform(affineTransform); + + var rect = linePath.getBounds2D(); + + boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX(), rect.getY(), rect.getWidth(), rect.getHeight()); + + if (pathElement.isClippingPath()) { + if (pathElement.isClipWindingFill()) { + linePath.setWindingRule(GeneralPath.WIND_NON_ZERO); + } else { + linePath.setWindingRule(GeneralPath.WIND_EVEN_ODD); + } + + context.clippingPathStack().intersectClippingPath(linePath); + pathElement.setPathClip(!context.delta()); + writer.writeElement(pathElement); + + } else { + if (pathElement.isWindingFill()) { + linePath.setWindingRule(GeneralPath.WIND_NON_ZERO); + } else { + linePath.setWindingRule(GeneralPath.WIND_EVEN_ODD); + } + + if (inClippingPath) { + if (isFilledAndNonTransparent(pathElement)) { + List currentOverlappedElements = context.visibleElements() + .stream() + .filter(features -> almostContains(linePath, features.getBoundingBox())) + .toList(); + context.overlappedElements().addAll(currentOverlappedElements); + context.visibleElements().removeAll(currentOverlappedElements); + } + context.visibleElements().add(ElementFeatures.extractFeatures(pathElement)); + if (!context.delta()) { + writer.writeElement(pathElement); + } + } + if (context.delta() && !inClippingPath) { + pathElement.getGState().setFillColorSpace(ColorSpace.createDeviceRGB()); + pathElement.getGState().setFillColor(new ColorPt(1, 0, 0)); + pathElement.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB()); + pathElement.getGState().setStrokeColor(new ColorPt(1, 0, 0)); + writer.writeElement(pathElement); + } + } + } + + + private void removeOverlappedElements(Page page, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException { + + context.reader().begin(page); + writer.begin(page, ElementWriter.e_replacement, false, true, page.getResourceDict()); + if (context.delta()) { + // green for element removed due to overlapping + context.overlappedElements().forEach(feature -> drawBBox(writer, feature.getBoundingBox(), "#00FF00")); + context.overlappedElements().clear(); + } + processOverlappedElements(writer, context); + writer.end(); + context.reader().end(); + + if (context.overlappedElements().size() > 0) { + log.warn(context.overlappedElements().size() + " overlapped elements have not been found or removed"); + } + } + + + private void processOverlappedElements(ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException { + + for (Element element = context.reader().next(); element != null; element = context.reader().next()) { + switch (element.getType()) { + case Element.e_form -> processFormOverlappedElements(writer, element, context); + case Element.e_path, Element.e_image, Element.e_inline_image, Element.e_text -> { + boolean anyMatch = false; + for (ElementFeatures elementToRemove : context.overlappedElements()) { + if (elementToRemove.almostMatches(element)) { + context.overlappedElements().remove(elementToRemove); + anyMatch = true; + break; + } + } + if (!anyMatch) { + writer.writeElement(element); + } else if (element.getType() == 3 && element.hasTextMatrix()) { + /* + PDFTron Element with type "text" refers to a Tj command. If a Tm command is just above it in the pdf file, PDFTron will join the two commands and treat them as one Element. + hasTextMatrix() checks for this case specifically. Also, Tm changes the position for a whole BT/ET segment, possibly containing multiple Tj commands. + Therefore, the position of a following Tj is affected by not writing the first Element. + This is why, we write only the Tm command: + */ + writer.writeGStateChanges(element); + } + } + default -> writer.writeElement(element); + } + } + } + + + private void processFormOverlappedElements(ElementWriter writer, Element formElement, InvisibleElementRemovalContext context) throws PDFNetException { + + writer.writeElement(formElement); + Obj formObj = formElement.getXObject(); + + if (!context.visitedXObjIds().contains(formObj.getObjNum())) { + context.visitedXObjIds().add(formObj.getObjNum()); + // writer needs to be newly initialized when entering a new content stream + // see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest) + ElementWriter formWriter = new ElementWriter(); + context.reader().formBegin(); + formWriter.begin(formObj); + + context.reader().clearChangeList(); + formWriter.setDefaultGState(context.reader()); + + processOverlappedElements(formWriter, context); + formWriter.end(); + formWriter.destroy(); + context.reader().end(); + } + } + + + private boolean isTextRenderedVisibly(GState gState) throws PDFNetException { + + return gState.getTextRenderMode() != GState.e_invisible_text && // + !(gState.getTextRenderMode() == GState.e_fill_text && gState.getFillOpacity() == 0) && // + !(gState.getTextRenderMode() == GState.e_stroke_text && gState.getStrokeOpacity() == 0) && // + !(gState.getTextRenderMode() == GState.e_fill_stroke_text && gState.getFillOpacity() == 0 && gState.getStrokeOpacity() == 0); + } + + + private GeneralPath convertToGeneralPath(PathData pathData) throws PDFNetException { + + GeneralPath linePath = new GeneralPath(); + Iterator points = Doubles.asList(pathData.getPoints()).iterator(); + Iterable operators = Bytes.asList(pathData.getOperators()); + for (var operator : operators) { + switch (operator) { + case PathData.e_moveto -> linePath.moveTo(points.next(), points.next()); + case PathData.e_lineto -> linePath.lineTo(points.next(), points.next()); + case PathData.e_cubicto -> linePath.curveTo(points.next(), points.next(), points.next(), points.next(), points.next(), points.next()); + case PathData.e_closepath -> linePath.closePath(); + case PathData.e_rect -> { + double x = points.next(); + double y = points.next(); + double w = points.next(); + double h = points.next(); + linePath.moveTo(x, y); + linePath.lineTo(x + w, y); + linePath.lineTo(x + w, y + h); + linePath.lineTo(x, y + h); + linePath.closePath(); + } + default -> throw new PDFNetException("Invalid Element Type", 0, "", "", ""); + } + } + return linePath; + } + + + private boolean almostContains(Shape outer, Rectangle2D inner) { + //To address inconsistencies in the calculation of the bounding box we slightly shrink the inner rectangle + + double x_with_tolerance = inner.getX() >= 0 ? inner.getX() + TOLERANCE : inner.getX() - TOLERANCE; + double y_with_tolerance = inner.getY() >= 0 ? inner.getY() + TOLERANCE : inner.getY() - TOLERANCE; + double height_with_tolerance = inner.getHeight() - (2 * TOLERANCE); + double width_with_tolerance = inner.getWidth() - (2 * TOLERANCE); + Rectangle2D innerRect = new Rectangle2D.Double(x_with_tolerance, y_with_tolerance, width_with_tolerance, height_with_tolerance); + + return outer.contains(innerRect); + } + + + private boolean isFilledAndNonTransparent(Element element) throws PDFNetException { + + return element.isFilled() && element.getGState().getFillOpacity() == 1; + } + + + @SneakyThrows + private void drawBBox(ElementWriter writer, Rectangle2D r, String hexcolor) { + + ColorPt colorPt = new ColorPt(Integer.valueOf(hexcolor.substring(1, 3), 16) / 255d, + Integer.valueOf(hexcolor.substring(3, 5), 16) / 255d, + Integer.valueOf(hexcolor.substring(5, 7), 16) / 255d); + ElementBuilder eb = new ElementBuilder(); + Element rect = eb.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight()); + rect.setPathStroke(true); + rect.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB()); + rect.getGState().setStrokeColor(colorPt); + writer.writePlacedElement(rect); + + colorPt.destroy(); + eb.destroy(); + } + + + private static AffineTransform toAffineTransform(Matrix2D ctm) throws PDFNetException { + + return new AffineTransform(ctm.getA(), ctm.getB(), ctm.getC(), ctm.getD(), ctm.getH(), ctm.getV()); + } + + + @Builder + private record InvisibleElementRemovalContext( + boolean delta, + ElementReader reader, + ClippingPathStack clippingPathStack, + List overlappedElements, + List visibleElements, + Set visitedXObjIds) { + + } + +} \ No newline at end of file diff --git a/src/test/resources/log4j2-test.xml b/src/test/resources/log4j2-test.xml new file mode 100644 index 0000000..5e206fa --- /dev/null +++ b/src/test/resources/log4j2-test.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + +