RED-9353: use azure ocr service

This commit is contained in:
Kilian Schüttler 2024-07-01 11:13:26 +02:00
parent a2f559af51
commit 66d3433e04
17 changed files with 821 additions and 44 deletions

View File

@ -148,6 +148,8 @@ public class LayoutParsingPipeline {
visualLayoutParsingResponse,
layoutParsingRequest.identifier());
log.info("Building document graph for {}", layoutParsingRequest.identifier());
Document documentGraph = observeBuildDocumentGraph(settings.getLayoutParsingTypeOverride() == null //
? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride(), classificationDocument);
@ -166,7 +168,7 @@ public class LayoutParsingPipeline {
layoutParsingStorageService.storeSimplifiedText(layoutParsingRequest, simplifiedSectionTextService.toSimplifiedText(documentGraph));
layoutParsingStorageService.storeViewerDocument(layoutParsingRequest, viewerDocumentFile);
if (layoutParsingRequest.layoutParsingType().equals(LayoutParsingType.CLARIFYND)) {
if (layoutParsingRequest.researchDocumentStorageId() != null) {
log.info("Building research document data for {}", layoutParsingRequest.identifier());
var researchDocumentData = TaasDocumentDataMapper.fromDocument(documentGraph);
layoutParsingStorageService.storeResearchDocumentData(layoutParsingRequest, researchDocumentData);
@ -257,7 +259,7 @@ public class LayoutParsingPipeline {
OutlineObject lastProcessedOutlineObject = null;
// parsing the structure elements could be useful as well
if(layoutParsingType != LayoutParsingType.REDACT_MANAGER_OLD && layoutParsingType != LayoutParsingType.DOCUMINE_OLD) {
if (layoutParsingType != LayoutParsingType.REDACT_MANAGER_OLD && layoutParsingType != LayoutParsingType.DOCUMINE_OLD) {
classificationDocument.setOutlineObjectTree(outlineExtractorService.getOutlineObjectTree(originDocument));
}
@ -305,37 +307,23 @@ public class LayoutParsingPipeline {
TextRulingsClassifier.classifyUnderlinedAndStrikethroughText(words, cleanRulings);
List<Box> graphics = graphicExtractorService.extractPathElementGraphics(originDocument,
pdPage,
pageNumber,
cleanRulings,
stripper.getTextPositionSequences(),
List<Box> graphics = graphicExtractorService.extractPathElementGraphics(originDocument, pdPage, pageNumber, cleanRulings, stripper.getTextPositionSequences(),
false);
false);
pdfImages.computeIfAbsent(pageNumber, x -> new ArrayList<>())
.addAll(graphics.stream()
.map(g -> new ClassifiedImage(new Rectangle2D.Double(g.x1, g.y1, g.width(), g.height()),
ImageType.GRAPHIC,
false,
stripper.getPageNumber(),
""))
.map(g -> new ClassifiedImage(new Rectangle2D.Double(g.x1, g.y1, g.width(), g.height()), ImageType.GRAPHIC, false, stripper.getPageNumber(), ""))
.toList());
ClassificationPage classificationPage = switch (layoutParsingType) {
case REDACT_MANAGER_OLD ->
redactManagerBlockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings, classificationDocument.getVisualizations());
case DOCUMINE_OLD -> docuMineBlockificationService.blockify(words, cleanRulings);
case DOCUMINE, REDACT_MANAGER, REDACT_MANAGER_PARAGRAPH_DEBUG, REDACT_MANAGER_WITHOUT_DUPLICATE_PARAGRAPH -> docstrumBlockificationService.blockify(words,
cleanRulings,
true,
classificationDocument.getVisualizations(),
layoutParsingType);
case CLARIFYND, CLARIFYND_PARAGRAPH_DEBUG -> docstrumBlockificationService.blockify(words,
cleanRulings,
false,
classificationDocument.getVisualizations(),
layoutParsingType);
case DOCUMINE, REDACT_MANAGER, REDACT_MANAGER_PARAGRAPH_DEBUG, REDACT_MANAGER_WITHOUT_DUPLICATE_PARAGRAPH ->
docstrumBlockificationService.blockify(words, cleanRulings, true, classificationDocument.getVisualizations(), layoutParsingType);
case CLARIFYND, CLARIFYND_PARAGRAPH_DEBUG ->
docstrumBlockificationService.blockify(words, cleanRulings, false, classificationDocument.getVisualizations(), layoutParsingType);
};
classificationPage.setCleanRulings(cleanRulings);
@ -345,7 +333,7 @@ public class LayoutParsingPipeline {
classificationPage.setPageWidth(cropbox.getWidth());
classificationPage.setPageHeight(cropbox.getHeight());
if(layoutParsingType != LayoutParsingType.REDACT_MANAGER_OLD && layoutParsingType != LayoutParsingType.DOCUMINE_OLD) {
if (layoutParsingType != LayoutParsingType.REDACT_MANAGER_OLD && layoutParsingType != LayoutParsingType.DOCUMINE_OLD) {
List<OutlineObject> outlineObjects = classificationDocument.getOutlineObjectTree().getOutlineObjectsPerPage().getOrDefault(pageNumber - 1, new ArrayList<>());
OutlineObject notFoundOutlineObject = null;
@ -394,8 +382,8 @@ public class LayoutParsingPipeline {
}
log.info("Classify TextBlocks for {}", identifier);
switch (layoutParsingType) {
case REDACT_MANAGER, REDACT_MANAGER_PARAGRAPH_DEBUG, REDACT_MANAGER_OLD, CLARIFYND_PARAGRAPH_DEBUG, REDACT_MANAGER_WITHOUT_DUPLICATE_PARAGRAPH -> redactManagerClassificationService.classifyDocument(
classificationDocument);
case REDACT_MANAGER, REDACT_MANAGER_PARAGRAPH_DEBUG, REDACT_MANAGER_OLD, CLARIFYND_PARAGRAPH_DEBUG, REDACT_MANAGER_WITHOUT_DUPLICATE_PARAGRAPH ->
redactManagerClassificationService.classifyDocument(classificationDocument);
case DOCUMINE_OLD, DOCUMINE -> docuMineClassificationService.classifyDocument(classificationDocument);
case CLARIFYND -> clarifyndClassificationService.classifyDocument(classificationDocument);
}

View File

@ -5,6 +5,9 @@ import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Configuration;
import com.google.common.base.Strings;
import com.knecon.fforesight.service.viewerdoc.service.IViewerDocumentService;
import com.knecon.fforesight.service.viewerdoc.service.pdftron.PDFTronViewerDocumentService;
import com.knecon.fforesight.service.viewerdoc.service.ViewerDocumentService;
import io.micrometer.observation.ObservationRegistry;
@ -13,12 +16,16 @@ import io.micrometer.observation.ObservationRegistry;
@ComponentScan
public class LayoutParsingServiceProcessorConfiguration {
@Bean
@Autowired
public ViewerDocumentService viewerDocumentService(ObservationRegistry registry) {
public IViewerDocumentService viewerDocumentService(ObservationRegistry registry, LayoutparserSettings settings) {
if (!Strings.isNullOrEmpty(settings.getPdftronLicense())) {
return new PDFTronViewerDocumentService(registry, settings.getPdftronLicense());
} else {
return new ViewerDocumentService(registry);
}
return new ViewerDocumentService(registry);
}
}

View File

@ -17,4 +17,5 @@ public class LayoutparserSettings {
boolean debug;
LayoutParsingType layoutParsingTypeOverride;
String pdftronLicense;
}

View File

@ -35,7 +35,7 @@ import com.knecon.fforesight.service.viewerdoc.model.LayoutGrid;
import com.knecon.fforesight.service.viewerdoc.model.PlacedText;
import com.knecon.fforesight.service.viewerdoc.model.Standard14EmbeddableFont;
import com.knecon.fforesight.service.viewerdoc.model.Visualizations;
import com.knecon.fforesight.service.viewerdoc.service.ViewerDocumentService;
import com.knecon.fforesight.service.viewerdoc.service.IViewerDocumentService;
import io.micrometer.observation.annotation.Observed;
import lombok.AccessLevel;
@ -48,7 +48,7 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true)
public class LayoutGridService {
ViewerDocumentService viewerDocumentService;
IViewerDocumentService viewerDocumentService;
static float FONT_SIZE = 10f;
static float LINE_WIDTH = 1f;

View File

@ -190,7 +190,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {
imageMetadata.getGeometry().getHeight()),
ImageType.valueOf(imageMetadata.getClassification().getLabel().toUpperCase(Locale.ROOT)),
imageMetadata.isAlpha(),
imageMetadata.getPosition().getPageNumber(), "")));
imageMetadata.getPosition().getPageNumber(),
"")));
System.out.println("object");
}

View File

@ -12,6 +12,7 @@ dependencies {
implementation("org.apache.pdfbox:pdfbox-tools:${pdfBoxVersion}")
implementation("org.slf4j:slf4j-api:1.7.25")
implementation("com.knecon.fforesight:tracing-commons:0.5.0")
implementation("com.pdftron:PDFNet:10.5.0")
testImplementation("org.apache.logging.log4j:log4j-slf4j-impl:2.22.1")
testImplementation("org.junit.jupiter:junit-jupiter")

View File

@ -3,8 +3,13 @@ package com.knecon.fforesight.service.viewerdoc.model;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.font.PDFont;
import com.pdftron.pdf.Font;
import com.pdftron.pdf.PDFDoc;
public interface EmbeddableFont {
PDFont embed(PDDocument document);
Font embed(PDFDoc doc);
}

View File

@ -1,10 +1,15 @@
package com.knecon.fforesight.service.viewerdoc.model;
import java.util.Objects;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
import com.pdftron.pdf.Font;
import com.pdftron.pdf.PDFDoc;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
@ -13,10 +18,11 @@ public class Standard14EmbeddableFont implements EmbeddableFont {
private final PDType1Font font;
private final int pdfTronIdentifier;
public static Standard14EmbeddableFont helvetica() {
return new Standard14EmbeddableFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA));
return new Standard14EmbeddableFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA), Font.e_helvetica);
}
@ -34,4 +40,12 @@ public class Standard14EmbeddableFont implements EmbeddableFont {
return font;
}
@Override
@SneakyThrows
public Font embed(PDFDoc document) {
return Font.create(document, pdfTronIdentifier);
}
}

View File

@ -23,4 +23,10 @@ public class VisualizationsOnPage {
@Builder.Default
List<FilledRectangle> filledRectangles = new LinkedList<>();
public boolean isEmpty() {
return placedTexts.isEmpty() && coloredLines.isEmpty() && coloredRectangles.isEmpty() && filledRectangles.isEmpty();
}
}

View File

@ -0,0 +1,27 @@
package com.knecon.fforesight.service.viewerdoc.service;
import java.io.File;
import java.util.List;
import com.knecon.fforesight.service.viewerdoc.ContentStreams;
import com.knecon.fforesight.service.viewerdoc.model.Visualizations;
import io.micrometer.observation.ObservationRegistry;
public interface IViewerDocumentService {
void addVisualizationsOnPage(File originFile, File destinationFile, List<Visualizations> visualizations);
default void enrichObservation(ObservationRegistry registry, int numberOfPages, List<ContentStreams.Identifier> layers) {
if (registry == null || registry.getCurrentObservation() == null || registry.isNoop()) {
return;
}
registry.getCurrentObservation().highCardinalityKeyValue("numberOfPages", String.valueOf(numberOfPages));
for (int i = 0; i < layers.size(); i++) {
ContentStreams.Identifier layer = layers.get(i);
registry.getCurrentObservation().highCardinalityKeyValue("layer_" + i, String.valueOf(layer.name()));
}
}
}

View File

@ -48,7 +48,7 @@ import lombok.extern.slf4j.Slf4j;
@Slf4j
@RequiredArgsConstructor
public class ViewerDocumentService {
public class ViewerDocumentService implements IViewerDocumentService {
private final ObservationRegistry registry;
@ -64,7 +64,8 @@ public class ViewerDocumentService {
PDDocument pdDocument = openPDDocument(tmpFile.toFile());
enrichObservation(pdDocument,
enrichObservation(registry,
pdDocument.getNumberOfPages(),
visualizations.stream()
.map(Visualizations::getLayer)
.toList());
@ -106,7 +107,11 @@ public class ViewerDocumentService {
contentStream.saveGraphicsState();
drawVisualizationsToContentStream(pdDocument, visualization.getVisualizationsOnPages().get(pageNumber), contentStream, textDeRotationMatrix);
drawVisualizationsToContentStream(pdDocument,
visualization.getVisualizationsOnPages()
.get(pageNumber),
contentStream,
textDeRotationMatrix);
contentStream.restoreGraphicsState();
@ -185,8 +190,10 @@ public class ViewerDocumentService {
contentStream.setFont(font, placedText.fontSize());
contentStream.beginText();
contentStream.setNonStrokingColor(placedText.color());
if (placedText.renderingMode().isPresent()) {
contentStream.setRenderingMode(placedText.renderingMode().get());
if (placedText.renderingMode()
.isPresent()) {
contentStream.setRenderingMode(placedText.renderingMode()
.get());
} else {
contentStream.setRenderingMode(RenderingMode.FILL);
}
@ -198,12 +205,12 @@ public class ViewerDocumentService {
}
private void enrichObservation(PDDocument pdDocument, List<ContentStreams.Identifier> layers) {
private void enrichObservation(int numberOfPages, List<ContentStreams.Identifier> layers) {
if (registry == null || registry.getCurrentObservation() == null || registry.isNoop()) {
return;
}
registry.getCurrentObservation().highCardinalityKeyValue("numberOfPages", String.valueOf(pdDocument.getNumberOfPages()));
registry.getCurrentObservation().highCardinalityKeyValue("numberOfPages", String.valueOf(numberOfPages));
for (int i = 0; i < layers.size(); i++) {
ContentStreams.Identifier layer = layers.get(i);
@ -234,7 +241,8 @@ public class ViewerDocumentService {
(float) placedText.lineStart().getX(),
(float) placedText.lineStart().getY());
} else {
textMatrix = placedText.textMatrix().get();
textMatrix = placedText.textMatrix()
.get();
}
return textMatrix;
}

View File

@ -0,0 +1,73 @@
package com.knecon.fforesight.service.viewerdoc.service.pdftron;
import java.util.Deque;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;
public class MarkedContentStack {
private final Deque<MarkedContent> stack = new LinkedList<>();
public void enterMarkedContent(String name) {
stack.push(new MarkedContent(name));
}
public void leaveMarkedContent() {
stack.pop();
}
public String currentMarkedContent() {
if (stack.isEmpty()) {
return "";
}
return stack.peek().name();
}
public boolean currentMarkedContentContains(String name) {
Iterator<MarkedContent> markedContentIterator = stack.descendingIterator();
while (markedContentIterator.hasNext()) {
var markedContent = markedContentIterator.next();
if (markedContent.name().equals(name)) {
return true;
}
}
return false;
}
public boolean currentMarkedContentContainsAny(Set<String> names) {
if (stack.isEmpty()) {
return false;
}
Iterator<MarkedContent> markedContentIterator = stack.descendingIterator();
while (markedContentIterator.hasNext()) {
var markedContent = markedContentIterator.next();
if (names.contains(markedContent.name())) {
return true;
}
}
return false;
}
public void clear() {
stack.clear();
}
private record MarkedContent(String name) {
}
}

View File

@ -0,0 +1,153 @@
package com.knecon.fforesight.service.viewerdoc.service.pdftron;
import java.io.File;
import java.io.FileInputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.pdfbox.cos.COSName;
import com.knecon.fforesight.service.viewerdoc.ContentStreams;
import com.knecon.fforesight.service.viewerdoc.model.EmbeddableFont;
import com.knecon.fforesight.service.viewerdoc.model.PlacedText;
import com.knecon.fforesight.service.viewerdoc.model.Visualizations;
import com.knecon.fforesight.service.viewerdoc.model.VisualizationsOnPage;
import com.knecon.fforesight.service.viewerdoc.service.IViewerDocumentService;
import com.pdftron.pdf.ElementBuilder;
import com.pdftron.pdf.ElementReader;
import com.pdftron.pdf.ElementWriter;
import com.pdftron.pdf.Font;
import com.pdftron.pdf.PDFDoc;
import com.pdftron.pdf.PDFNet;
import com.pdftron.pdf.Page;
import com.pdftron.pdf.PageIterator;
import com.pdftron.pdf.ocg.Group;
import com.pdftron.sdf.SDFDoc;
import io.micrometer.observation.ObservationRegistry;
import io.micrometer.observation.annotation.Observed;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@RequiredArgsConstructor
public class PDFTronViewerDocumentService implements IViewerDocumentService {
private final ObservationRegistry registry;
private final String pdftronLicense;
@Override
@Observed(name = "PDFTronViewerDocumentService", contextualName = "add-visualizations")
@SneakyThrows
public synchronized void addVisualizationsOnPage(File originFile, File destinationFile, List<Visualizations> visualizations) {
PDFNet.initialize(pdftronLicense);
// originFile and destinationFile might be the same, so we use a temp file.
// Otherwise, saving the document might corrupt the file
Path tmpFile = Files.createTempFile("tmpViewerDocument", ".pdf");
Files.copy(originFile.toPath(), tmpFile, StandardCopyOption.REPLACE_EXISTING);
try (PDFDoc pdfDoc = loadPdfDoc(tmpFile);//
ElementWriter pageWriter = new ElementWriter();//
ElementReader reader = new ElementReader();//
ElementBuilder builder = new ElementBuilder()//
) {
enrichObservation(registry,
pdfDoc.getPageCount(),
visualizations.stream()
.map(Visualizations::getLayer)
.toList());
Map<ContentStreams.Identifier, Group> groupMap = PdftronLayerUtility.addLayersToDocument(visualizations, pdfDoc);
Map<EmbeddableFont, Font> fontMap = buildFontMap(visualizations, pdfDoc);
Set<String> markedContentToDraw = extractMarkedContentNames(visualizations.stream()
.map(Visualizations::getLayer));
Set<String> kneconMarkedContents = extractMarkedContentNames(ContentStreams.allContentStreams.stream());
PageContentCleaner pageContentCleaner = PageContentCleaner.builder()
.writer(pageWriter)
.reader(reader)
.elementBuilder(builder)
.markedContentToDraw(markedContentToDraw)
.kneconMarkedContents(kneconMarkedContents)
.build();
VisualizationWriter visualizationWriter = VisualizationWriter.builder()
.writer(pageWriter)
.builder(builder)
.groupMap(groupMap)
.visualizations(visualizations)
.fontMap(fontMap)
.build();
int pageNumber = 0;
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); pageNumber++) {
Page page = iterator.next();
pageContentCleaner.cleanPage(page);
visualizationWriter.drawVisualizationsOnPage(pageNumber, page);
}
saveDocument(pdfDoc, destinationFile);
}
PDFNet.terminate();
}
private static Set<String> extractMarkedContentNames(Stream<ContentStreams.Identifier> visualizations) {
return visualizations.map(ContentStreams.Identifier::cosName)
.map(COSName::getName)
.collect(Collectors.toSet());
}
private static Map<EmbeddableFont, Font> buildFontMap(List<Visualizations> visualizations, PDFDoc pdfDoc) {
return visualizations.stream()
.map(Visualizations::getVisualizationsOnPages)
.map(Map::values)
.flatMap(Collection::stream)
.map(VisualizationsOnPage::getPlacedTexts)
.flatMap(Collection::stream)
.map(PlacedText::font)
.distinct()
.collect(Collectors.toMap(Function.identity(), font -> font.embed(pdfDoc)));
}
@SneakyThrows
private void saveDocument(PDFDoc doc, File target) {
doc.save(target.toString(), SDFDoc.SaveMode.REMOVE_UNUSED, null);
}
@SneakyThrows
private static PDFDoc loadPdfDoc(Path tmpFile) {
try (var in = new FileInputStream(tmpFile.toFile())) {
return new PDFDoc(in);
}
}
}

View File

@ -0,0 +1,120 @@
package com.knecon.fforesight.service.viewerdoc.service.pdftron;
import java.util.Set;
import com.pdftron.pdf.Element;
import com.pdftron.pdf.ElementBuilder;
import com.pdftron.pdf.ElementReader;
import com.pdftron.pdf.ElementWriter;
import com.pdftron.pdf.Page;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
@Builder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class PageContentCleaner {
ElementWriter writer;
ElementReader reader;
ElementBuilder elementBuilder;
Set<String> markedContentToDraw;
Set<String> kneconMarkedContents;
@Builder.Default
MarkedContentStack markedContentStack = new MarkedContentStack();
@SneakyThrows
public void cleanPage(Page page) {
begin(page);
boolean escaped = reader.next().getType() == Element.e_group_begin;
if (!escaped) {
writer.writeElement(elementBuilder.createGroupBegin());
}
copyElementsUntilFirstKneconMarkedContent();
if (!escaped) {
writer.writeElement(elementBuilder.createGroupEnd());
}
copyElementsExceptMarkedContentToDraw();
end();
}
@SneakyThrows
private void begin(Page page) {
writer.begin(page, ElementWriter.e_replacement, false, true, page.getResourceDict());
reader.begin(page);
}
@SneakyThrows
private void end() {
writer.end();
reader.end();
}
@SneakyThrows
private void copyElementsUntilFirstKneconMarkedContent() {
for (Element element = reader.current(); element != null; element = reader.next()) {
switch (element.getType()) {
case Element.e_marked_content_begin -> {
markedContentStack.enterMarkedContent(element.getMCTag().getName());
if (markedContentStack.currentMarkedContentContainsAny(kneconMarkedContents)) {
break;
}
writer.writeElement(element);
}
case Element.e_marked_content_end -> {
markedContentStack.leaveMarkedContent();
writer.writeElement(element);
}
default -> writer.writeElement(element);
}
}
}
@SneakyThrows
private void copyElementsExceptMarkedContentToDraw() {
for (Element element = reader.current(); element != null; element = reader.next()) {
switch (element.getType()) {
case Element.e_marked_content_begin -> {
markedContentStack.enterMarkedContent(element.getMCTag().getName());
if (!markedContentStack.currentMarkedContentContainsAny(markedContentToDraw)) {
writer.writeElement(element);
}
}
case Element.e_marked_content_end -> {
if (!markedContentStack.currentMarkedContentContainsAny(markedContentToDraw)) {
writer.writeElement(element);
}
markedContentStack.leaveMarkedContent();
}
default -> {
if (!markedContentStack.currentMarkedContentContainsAny(markedContentToDraw)) {
writer.writeElement(element);
}
}
}
}
}
}

View File

@ -0,0 +1,96 @@
package com.knecon.fforesight.service.viewerdoc.service.pdftron;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import com.knecon.fforesight.service.viewerdoc.ContentStreams;
import com.knecon.fforesight.service.viewerdoc.model.Visualizations;
import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.PDFDoc;
import com.pdftron.pdf.ocg.Config;
import com.pdftron.pdf.ocg.Group;
import com.pdftron.sdf.Obj;
import lombok.SneakyThrows;
import lombok.experimental.UtilityClass;
@UtilityClass
public class PdftronLayerUtility {
public Map<ContentStreams.Identifier, Group> addLayersToDocument(List<Visualizations> visualizations, PDFDoc pdfDoc) {
Map<ContentStreams.Identifier, Group> optionalContentGroupMap = new HashMap<>();
for (Visualizations visualization : visualizations) {
addLayerToDocument(visualization.getLayer(), pdfDoc, visualization.isLayerVisibilityDefaultValue())//
.ifPresent(ocg -> optionalContentGroupMap.put(visualization.getLayer(), ocg));
}
return optionalContentGroupMap;
}
private Optional<Group> addLayerToDocument(ContentStreams.Identifier layer, PDFDoc pdfDoc, boolean layerVisibilityDefaultValue) {
if (layer.optionalContent()) {
return Optional.of(addLayerToDocument(pdfDoc, layer.name(), layerVisibilityDefaultValue));
}
return Optional.empty();
}
@SneakyThrows
private Group addLayerToDocument(PDFDoc doc, String layerName, boolean layerVisibilityDefaultValue) {
Optional<Group> existingGroup = findGroupInDoc(doc, layerName);
if (existingGroup.isPresent()) {
return existingGroup.get();
}
return addNewLayer(doc, layerName, layerVisibilityDefaultValue);
}
private Group addNewLayer(PDFDoc doc, String layerName, boolean layerVisibilityDefaultValue) throws PDFNetException {
Config cfg = doc.getOCGConfig();
if (cfg == null) {
cfg = Config.create(doc, true);
cfg.setName("Default");
}
Group grp = Group.create(doc, layerName);
grp.setInitialState(cfg, layerVisibilityDefaultValue);
// Add the new OCG to the list of layers that should appear in PDF viewer GUI.
Obj layerOrderArray = cfg.getOrder();
if (layerOrderArray == null) {
layerOrderArray = doc.createIndirectArray();
cfg.setOrder(layerOrderArray);
}
layerOrderArray.pushBack(grp.getSDFObj());
return grp;
}
@SneakyThrows
private Optional<Group> findGroupInDoc(PDFDoc doc, String layerName) {
Obj ocgs = doc.getOCGs();
if (ocgs != null) {
int i;
int sz = (int) ocgs.size();
for (i = 0; i < sz; ++i) {
Group ocg = new Group(ocgs.getAt(i));
if (ocg.getName().equals(layerName)) {
return Optional.of(ocg);
}
}
}
return Optional.empty();
}
}

View File

@ -0,0 +1,249 @@
package com.knecon.fforesight.service.viewerdoc.service.pdftron;
import java.awt.geom.AffineTransform;
import java.awt.geom.Line2D;
import java.awt.geom.Rectangle2D;
import java.util.List;
import java.util.Map;
import com.knecon.fforesight.service.viewerdoc.ContentStreams;
import com.knecon.fforesight.service.viewerdoc.model.ColoredLine;
import com.knecon.fforesight.service.viewerdoc.model.ColoredRectangle;
import com.knecon.fforesight.service.viewerdoc.model.EmbeddableFont;
import com.knecon.fforesight.service.viewerdoc.model.FilledRectangle;
import com.knecon.fforesight.service.viewerdoc.model.PlacedText;
import com.knecon.fforesight.service.viewerdoc.model.Visualizations;
import com.knecon.fforesight.service.viewerdoc.model.VisualizationsOnPage;
import com.pdftron.common.Matrix2D;
import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.ColorPt;
import com.pdftron.pdf.ColorSpace;
import com.pdftron.pdf.Element;
import com.pdftron.pdf.ElementBuilder;
import com.pdftron.pdf.ElementWriter;
import com.pdftron.pdf.Font;
import com.pdftron.pdf.GState;
import com.pdftron.pdf.Page;
import com.pdftron.pdf.ocg.Group;
import lombok.AccessLevel;
import lombok.Builder;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
@Builder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class VisualizationWriter {
ElementWriter writer;
ElementBuilder builder;
List<Visualizations> visualizations;
Map<ContentStreams.Identifier, Group> groupMap;
Map<EmbeddableFont, Font> fontMap;
@SneakyThrows
public void drawVisualizationsOnPage(int pageNumber, Page page) {
begin(page);
AffineTransform textDeRotationMatrix = getTextDeRotationTransform(page);
for (Visualizations visualization : visualizations) {
VisualizationsOnPage visualizationsOnPage = visualization.getVisualizationsOnPages()
.get(pageNumber);
if (visualizationsOnPage == null || visualizationsOnPage.isEmpty()) {
continue;
}
Element markedContentStart = builder.createMarkedContentBeginInlineProperties(visualization.getLayer().cosName().getName());
writer.writeElement(markedContentStart);
if (visualization.getLayer().optionalContent()) {
Element ocgStart = builder.createMarkedContentBegin("OC", groupMap.get(visualization.getLayer()).getSDFObj());
writer.writeElement(ocgStart);
}
writeVisualization(visualizationsOnPage, textDeRotationMatrix);
if (visualization.getLayer().optionalContent()) {
Element ocgEnd = builder.createMarkedContentEnd();
writer.writeElement(ocgEnd);
}
Element markedContentEnd = builder.createMarkedContentEnd();
writer.writeElement(markedContentEnd);
}
end();
}
private void end() throws PDFNetException {
writer.end();
}
private void begin(Page page) throws PDFNetException {
writer.begin(page, ElementWriter.e_overlay, false, true, page.getResourceDict());
}
@SneakyThrows
private void writeVisualization(VisualizationsOnPage visualizationsOnPage, AffineTransform textDeRotationMatrix) {
if (visualizationsOnPage.isMakePathsInvisible()) {
Element rect = builder.createRect(0, 0, 0, 0);
rect.setPathClip(true);
writer.writeElement(rect);
}
for (ColoredLine coloredLine : visualizationsOnPage.getColoredLines()) {
drawColoredLine(coloredLine);
}
for (ColoredRectangle coloredRectangle : visualizationsOnPage.getColoredRectangles()) {
drawColoredRectangle(coloredRectangle);
}
for (FilledRectangle filledRectangle : visualizationsOnPage.getFilledRectangles()) {
drawFilledRectangle(filledRectangle);
}
for (PlacedText placedText : visualizationsOnPage.getPlacedTexts()) {
writePlacedText(textDeRotationMatrix, placedText);
}
}
private void writePlacedText(AffineTransform textDeRotationMatrix, PlacedText placedText) throws PDFNetException {
float[] rgbComponents = placedText.color().getRGBColorComponents(null);
Font font = fontMap.get(placedText.font());
Element text = builder.createTextRun(placedText.text(), font, placedText.fontSize());
if (placedText.renderingMode()
.isPresent()) {
text.getGState()
.setRenderingIntent(placedText.renderingMode()
.get().intValue());
} else {
try (ColorPt color = new ColorPt(rgbComponents[0], rgbComponents[1], rgbComponents[2])) {
text.getGState().setFillColor(color);
}
text.getGState().setRenderingIntent(GState.e_fill_text);
}
try (Matrix2D textMatrix = getTextMatrix(placedText, textDeRotationMatrix)) {
text.setTextMatrix(textMatrix);
}
writer.writeElement(text);
}
private void drawFilledRectangle(FilledRectangle filledRectangle) throws PDFNetException {
float[] rgbComponents = filledRectangle.color().getRGBColorComponents(null);
Rectangle2D r = filledRectangle.rectangle2D();
Element rect = builder.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight());
rect.setPathFill(true);
rect.setPathStroke(false);
rect.getGState().setFillColorSpace(ColorSpace.createDeviceRGB());
rect.getGState().setFillOpacity(filledRectangle.alpha());
try (ColorPt color = new ColorPt(rgbComponents[0], rgbComponents[1], rgbComponents[2])) {
rect.getGState().setFillColor(color);
}
writer.writeElement(rect);
}
private void drawColoredRectangle(ColoredRectangle coloredRectangle) throws PDFNetException {
float[] rgbComponents = coloredRectangle.color().getRGBColorComponents(null);
Rectangle2D r = coloredRectangle.rectangle2D();
Element rect = builder.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight());
rect.setPathStroke(true);
rect.setPathFill(false);
rect.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
rect.getGState().setLineWidth(coloredRectangle.lineWidth());
try (ColorPt color = new ColorPt(rgbComponents[0], rgbComponents[1], rgbComponents[2])) {
rect.getGState().setStrokeColor(color);
}
writer.writeElement(rect);
}
private void drawColoredLine(ColoredLine coloredLine) throws PDFNetException {
float[] rgbComponents = coloredLine.color().getRGBColorComponents(null);
Line2D l = coloredLine.line();
builder.pathBegin();
builder.moveTo(l.getX1(), l.getY1());
builder.lineTo(l.getX2(), l.getY2());
Element line = builder.pathEnd();
line.setPathStroke(true);
line.setPathFill(false);
line.getGState().setLineWidth(coloredLine.lineWidth());
line.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
try (ColorPt color = new ColorPt(rgbComponents[0], rgbComponents[1], rgbComponents[2])) {
line.getGState().setStrokeColor(color);
}
writer.writeElement(line);
}
@SneakyThrows
private static Matrix2D getTextMatrix(PlacedText placedText, AffineTransform textDeRotationMatrix) {
Matrix2D textMatrix;
if (placedText.textMatrix().isEmpty()) {
textMatrix = new Matrix2D(textDeRotationMatrix.getScaleX(),
textDeRotationMatrix.getShearX(),
textDeRotationMatrix.getShearY(),
textDeRotationMatrix.getScaleY(),
placedText.lineStart().getX(),
placedText.lineStart().getY());
} else {
var matrix = placedText.textMatrix()
.get();
textMatrix = new Matrix2D(matrix.getScaleX(), matrix.getShearX(), matrix.getShearY(), matrix.getScaleY(), matrix.getTranslateX(), matrix.getTranslateY());
}
return textMatrix;
}
@SneakyThrows
private static AffineTransform getTextDeRotationTransform(Page page) {
return AffineTransform.getQuadrantRotateInstance(switch (page.getRotation()) {
case 90 -> 3;
case 180 -> 2;
case 270 -> 1;
default -> 0;
});
}
}

View File

@ -1,5 +1,7 @@
#!/bin/bash
dir=${PWD##*/}
set -e
gradle assemble
# Get the current Git branch
@ -11,5 +13,31 @@ commit_hash=$(git rev-parse --short=5 HEAD)
# Combine branch and commit hash
buildName="${USER}-${branch}-${commit_hash}"
gradle bootBuildImage --publishImage -PbuildbootDockerHostNetwork=true -Pversion=$buildName --no-build-cache
echo "nexus.knecon.com:5001/ff/layoutparser-service-server:$buildName"
gradle bootBuildImage --publishImage -PbuildbootDockerHostNetwork=true -Pversion=${buildName}
newImageName="nexus.knecon.com:5001/ff/layoutparser-service-server:${buildName}"
echo "full image name:"
echo ${newImageName}
echo ""
if [ -z "$1" ]; then
exit 0
fi
namespace=${1}
deployment_name="layoutparser-service"
echo "deploying to ${namespace}"
oldImageName=$(rancher kubectl -n ${namespace} get deployment ${deployment_name} -o=jsonpath='{.spec.template.spec.containers[*].image}')
if [ "${newImageName}" = "${oldImageName}" ]; then
echo "Image tag did not change, redeploying..."
rancher kubectl rollout restart deployment ${deployment_name} -n ${namespace}
else
echo "upgrading the image tag..."
rancher kubectl set image deployment/${deployment_name} ${deployment_name}=${newImageName} -n ${namespace}
fi
rancher kubectl rollout status deployment ${deployment_name} -n ${namespace}
echo "Built ${deployment_name}:${buildName} and deployed to ${namespace}"