Pull request #2: RED-4875 2
Merge in RED/pdftron-logic-commons from RED-4875_2 to master * commit '46f3b74cb560e34bf6ec08d1ed430b193125e0ad': RED-4875 - close pdfdoc in finally and add UtilityClass annotation for class PdfTextExtraction RED-4875 - added '.' in javadoc RED-4875 - remove spring boot in pom and add a javadoc-link RED-4875 - remove method and add delta do call with pdfdpc RED-4875 - first implementation of hasInvisibleText() RED-4875 - move destroy of pdftron-elements out of loop RED-4875 - add possibility to call if with pdfdoc-object RED-4875 - move PdfTextExtraction.java from ocr-service to here RED-4875 - some refactoring and adding log RED-4875 - refactored code so it can get called with both, pdfdoc and in/output-steams RED-4875 - add import for docker-config RED-4875 - added Docker config again and deleted JUnit Resultparser RED-4875 - delete docker config
This commit is contained in:
commit
4875042086
@ -1,10 +1,9 @@
|
||||
package buildjob;
|
||||
|
||||
import static com.atlassian.bamboo.specs.builders.task.TestParserTask.createJUnitParserTask;
|
||||
|
||||
import com.atlassian.bamboo.specs.api.builders.docker.DockerConfiguration;
|
||||
import com.atlassian.bamboo.specs.api.BambooSpec;
|
||||
import com.atlassian.bamboo.specs.api.builders.BambooKey;
|
||||
import com.atlassian.bamboo.specs.api.builders.docker.DockerConfiguration;
|
||||
import com.atlassian.bamboo.specs.api.builders.permission.PermissionType;
|
||||
import com.atlassian.bamboo.specs.api.builders.permission.Permissions;
|
||||
import com.atlassian.bamboo.specs.api.builders.permission.PlanPermissions;
|
||||
@ -89,10 +88,6 @@ public class PlanSpec {
|
||||
.location(Location.FILE)
|
||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/build-java.sh")
|
||||
.argument(SERVICE_NAME),
|
||||
createJUnitParserTask()
|
||||
.description("Resultparser")
|
||||
.resultDirectories("**/test-reports/*.xml, **/target/surefire-reports/*.xml, **/target/failsafe-reports/*.xml")
|
||||
.enabled(true),
|
||||
new ScriptTask()
|
||||
.description("Sonar")
|
||||
.location(Location.FILE)
|
||||
|
||||
17
pom.xml
17
pom.xml
@ -31,23 +31,6 @@
|
||||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-web</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-configuration-processor</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.pdftron</groupId>
|
||||
<artifactId>PDFNet</artifactId>
|
||||
|
||||
@ -12,8 +12,6 @@ import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.google.common.primitives.Bytes;
|
||||
import com.google.common.primitives.Doubles;
|
||||
import com.pdftron.common.Matrix2D;
|
||||
@ -38,7 +36,6 @@ import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
public class InvisibleElementRemovalService {
|
||||
|
||||
static public final double TOLERANCE = 1e-3;
|
||||
@ -65,6 +62,34 @@ public class InvisibleElementRemovalService {
|
||||
|
||||
PDFDoc pdfDoc = new PDFDoc(pdfFile);
|
||||
|
||||
execute(pdfDoc, delta);
|
||||
|
||||
try {
|
||||
pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
|
||||
} catch (Exception e) {
|
||||
log.error("File could not be saved after invisible element removal");
|
||||
throw new RuntimeException(e);
|
||||
} finally {
|
||||
pdfDoc.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This method is similar to {@link #removeInvisibleElements(InputStream, OutputStream, boolean)}, just with a PDFDoc.
|
||||
*/
|
||||
@SneakyThrows
|
||||
public void removeInvisibleElements(PDFDoc pdfDoc, boolean delta) {
|
||||
|
||||
execute(pdfDoc, delta);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void execute(PDFDoc pdfDoc, boolean delta) {
|
||||
|
||||
log.info("Start removing invisible Elements");
|
||||
ElementWriter writer = new ElementWriter();
|
||||
ElementReader reader = new ElementReader();
|
||||
Set<Long> visitedXObjIds = new TreeSet<>();
|
||||
@ -75,7 +100,6 @@ public class InvisibleElementRemovalService {
|
||||
|
||||
visitedXObjIds.add(page.getSDFObj().getObjNum());
|
||||
|
||||
|
||||
InvisibleElementRemovalContext context = InvisibleElementRemovalContext.builder()
|
||||
.reader(reader)
|
||||
.clippingPathStack(new ClippingPathStack(page.getMediaBox()))
|
||||
@ -90,18 +114,12 @@ public class InvisibleElementRemovalService {
|
||||
context.visitedXObjIds().clear();
|
||||
|
||||
removeOverlappedElements(page, writer, context);
|
||||
}
|
||||
|
||||
try {
|
||||
pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
|
||||
} catch (Exception e) {
|
||||
log.error("File could not be saved after invisible element removal");
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
writer.destroy();
|
||||
reader.destroy();
|
||||
pdfDoc.close();
|
||||
|
||||
log.info("Finished removing invisible Elements");
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,44 @@
|
||||
package com.iqser.red.pdftronlogic.commons;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.PDFDoc;
|
||||
import com.pdftron.pdf.Page;
|
||||
import com.pdftron.pdf.PageIterator;
|
||||
import com.pdftron.pdf.TextExtractor;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class PdfTextExtraction {
|
||||
|
||||
private static String execute(PDFDoc pdfDoc) throws IOException, PDFNetException{
|
||||
TextExtractor extractor = new TextExtractor();
|
||||
List<String> texts = new ArrayList<>();
|
||||
|
||||
PageIterator iterator = pdfDoc.getPageIterator();
|
||||
while (iterator.hasNext()) {
|
||||
Page page = iterator.next();
|
||||
extractor.begin(page);
|
||||
texts.add(extractor.getAsText());
|
||||
}
|
||||
|
||||
extractor.destroy();
|
||||
pdfDoc.close();
|
||||
return String.join("\n", texts);
|
||||
}
|
||||
|
||||
public static String extractAllTextFromDocument(InputStream fileStream) throws IOException, PDFNetException {
|
||||
PDFDoc pdfDoc = new PDFDoc(fileStream);
|
||||
return execute(pdfDoc);
|
||||
}
|
||||
|
||||
public static String extractAllTextFromDocument(PDFDoc pdfDoc) throws IOException, PDFNetException {
|
||||
return execute(pdfDoc);
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user