Pull request #2: RED-4875 2

Merge in RED/pdftron-logic-commons from RED-4875_2 to master

* commit '46f3b74cb560e34bf6ec08d1ed430b193125e0ad':
  RED-4875 - close pdfdoc in finally and add UtilityClass annotation for class PdfTextExtraction
  RED-4875 - added '.' in javadoc
  RED-4875 - remove spring boot in pom and add a javadoc-link
  RED-4875 - remove method and add delta do call with pdfdpc
  RED-4875 - first implementation of hasInvisibleText()
  RED-4875 - move destroy of pdftron-elements out of loop
  RED-4875 - add possibility to call if with pdfdoc-object
  RED-4875 - move PdfTextExtraction.java from ocr-service to here
  RED-4875 - some refactoring and adding log
  RED-4875 - refactored code so it can get called with both, pdfdoc and in/output-steams
  RED-4875 - add import for docker-config
  RED-4875 - added Docker config again and deleted JUnit Resultparser
  RED-4875 - delete docker config
This commit is contained in:
Thomas Beyer 2023-03-21 14:26:22 +01:00
commit 4875042086
4 changed files with 75 additions and 35 deletions

View File

@ -1,10 +1,9 @@
package buildjob;
import static com.atlassian.bamboo.specs.builders.task.TestParserTask.createJUnitParserTask;
import com.atlassian.bamboo.specs.api.builders.docker.DockerConfiguration;
import com.atlassian.bamboo.specs.api.BambooSpec;
import com.atlassian.bamboo.specs.api.builders.BambooKey;
import com.atlassian.bamboo.specs.api.builders.docker.DockerConfiguration;
import com.atlassian.bamboo.specs.api.builders.permission.PermissionType;
import com.atlassian.bamboo.specs.api.builders.permission.Permissions;
import com.atlassian.bamboo.specs.api.builders.permission.PlanPermissions;
@ -89,10 +88,6 @@ public class PlanSpec {
.location(Location.FILE)
.fileFromPath("bamboo-specs/src/main/resources/scripts/build-java.sh")
.argument(SERVICE_NAME),
createJUnitParserTask()
.description("Resultparser")
.resultDirectories("**/test-reports/*.xml, **/target/surefire-reports/*.xml, **/target/failsafe-reports/*.xml")
.enabled(true),
new ScriptTask()
.description("Sonar")
.location(Location.FILE)

17
pom.xml
View File

@ -31,23 +31,6 @@
<artifactId>guava</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.pdftron</groupId>
<artifactId>PDFNet</artifactId>

View File

@ -12,8 +12,6 @@ import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import org.springframework.stereotype.Service;
import com.google.common.primitives.Bytes;
import com.google.common.primitives.Doubles;
import com.pdftron.common.Matrix2D;
@ -38,7 +36,6 @@ import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
public class InvisibleElementRemovalService {
static public final double TOLERANCE = 1e-3;
@ -65,6 +62,34 @@ public class InvisibleElementRemovalService {
PDFDoc pdfDoc = new PDFDoc(pdfFile);
execute(pdfDoc, delta);
try {
pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
} catch (Exception e) {
log.error("File could not be saved after invisible element removal");
throw new RuntimeException(e);
} finally {
pdfDoc.close();
}
}
/**
* This method is similar to {@link #removeInvisibleElements(InputStream, OutputStream, boolean)}, just with a PDFDoc.
*/
@SneakyThrows
public void removeInvisibleElements(PDFDoc pdfDoc, boolean delta) {
execute(pdfDoc, delta);
}
@SneakyThrows
private void execute(PDFDoc pdfDoc, boolean delta) {
log.info("Start removing invisible Elements");
ElementWriter writer = new ElementWriter();
ElementReader reader = new ElementReader();
Set<Long> visitedXObjIds = new TreeSet<>();
@ -75,7 +100,6 @@ public class InvisibleElementRemovalService {
visitedXObjIds.add(page.getSDFObj().getObjNum());
InvisibleElementRemovalContext context = InvisibleElementRemovalContext.builder()
.reader(reader)
.clippingPathStack(new ClippingPathStack(page.getMediaBox()))
@ -90,18 +114,12 @@ public class InvisibleElementRemovalService {
context.visitedXObjIds().clear();
removeOverlappedElements(page, writer, context);
}
try {
pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
} catch (Exception e) {
log.error("File could not be saved after invisible element removal");
throw new RuntimeException(e);
}
writer.destroy();
reader.destroy();
pdfDoc.close();
log.info("Finished removing invisible Elements");
}

View File

@ -0,0 +1,44 @@
package com.iqser.red.pdftronlogic.commons;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.PDFDoc;
import com.pdftron.pdf.Page;
import com.pdftron.pdf.PageIterator;
import com.pdftron.pdf.TextExtractor;
import lombok.experimental.UtilityClass;
@UtilityClass
public class PdfTextExtraction {
private static String execute(PDFDoc pdfDoc) throws IOException, PDFNetException{
TextExtractor extractor = new TextExtractor();
List<String> texts = new ArrayList<>();
PageIterator iterator = pdfDoc.getPageIterator();
while (iterator.hasNext()) {
Page page = iterator.next();
extractor.begin(page);
texts.add(extractor.getAsText());
}
extractor.destroy();
pdfDoc.close();
return String.join("\n", texts);
}
public static String extractAllTextFromDocument(InputStream fileStream) throws IOException, PDFNetException {
PDFDoc pdfDoc = new PDFDoc(fileStream);
return execute(pdfDoc);
}
public static String extractAllTextFromDocument(PDFDoc pdfDoc) throws IOException, PDFNetException {
return execute(pdfDoc);
}
}