From e6a1656e1859b8ae94c616e808d65345dab6f5b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kilian=20Sch=C3=BCttler?= Date: Mon, 26 Aug 2024 14:59:10 +0200 Subject: [PATCH] RED-9864: Ocr not working --- .../InvisibleElementRemovalService.java | 29 ++++--- .../red/pdftronlogic/commons/PDFNetUtils.java | 49 ++++++++++++ .../commons/PdfTextExtraction.java | 2 +- .../features/ElementFeatureFactory.java | 72 ++++++++--------- .../commons/features/ElementFeatures.java | 5 ++ .../commons/features/GlyphInfo.java | 78 +++++++++++++++---- .../commons/features/TextFeatures.java | 24 ++---- .../commons/lookup/ElementFeatureLookup.java | 44 ++++++++--- .../commons/lookup/OverlapVisitor.java | 42 ---------- .../commons/GlyphExtractionTest.java | 13 ++-- .../InvisibleElementRemovalServiceTest.java | 11 ++- 11 files changed, 224 insertions(+), 145 deletions(-) create mode 100644 src/main/java/com/iqser/red/pdftronlogic/commons/PDFNetUtils.java delete mode 100644 src/main/java/com/iqser/red/pdftronlogic/commons/lookup/OverlapVisitor.java diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java b/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java index b3756fc..1ba1dcc 100644 --- a/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java @@ -163,7 +163,7 @@ public class InvisibleElementRemovalService { visitedXObjIds.add(page.getSDFObj().getObjNum()); - InvisibleElementRemovalContext context = InvisibleElementRemovalContext.builder() + try (InvisibleElementRemovalContext context = InvisibleElementRemovalContext.builder() .reader(reader) .clippingPathStack(new ClippingPathStack(page.getMediaBox())) .markedContentStack(new MarkedContentStack(pdfDoc)) @@ -173,14 +173,15 @@ public class InvisibleElementRemovalService { .visibleElements(new ElementFeatureLookup()) .visitedXObjIds(visitedXObjIds) .markedContentToIgnore(markedContentToIgnore) - .build(); + .build()) { - removeClippedElementsAndInvisibleTextAndRememberOverlappedElements(page, writer, context); + removeClippedElementsAndInvisibleTextAndRememberOverlappedElements(page, writer, context); - context.visitedXObjIds().clear(); - context.markedContentStack().clear(); + context.visitedXObjIds().clear(); + context.markedContentStack().clear(); - removeOverlappedElements(page, writer, context); + removeOverlappedElements(page, writer, context); + } } } @@ -248,10 +249,7 @@ public class InvisibleElementRemovalService { boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight()); if (inClippingPath) { ImageFeatures imageFeatures = ElementFeatureFactory.buildImage(imageElement); - if (!(context.markedContentStack.contextHasTransparency() - || imageFeatures.isTransparent() - || imageFeatures.isImageMask() - || imageFeatures.isSoftMask())) { + if (!(context.markedContentStack.contextHasTransparency() || imageFeatures.isTransparent() || imageFeatures.isImageMask() || imageFeatures.isSoftMask())) { calculateOverlaps(context, imageFeatures, imageFeatures.isMasked()); } context.visibleElements().add(imageFeatures); @@ -280,7 +278,7 @@ public class InvisibleElementRemovalService { boolean isTextVisible = isTextRenderedVisibly(gState, textBBox, context); if (inClippingPath && isTextVisible) { - context.visibleElements().add(ElementFeatureFactory.buildText(textElement, context.delta())); + context.visibleElements().add(ElementFeatureFactory.buildText(textElement, true, context.delta())); } if (!context.delta()) { if (inClippingPath && isTextVisible) { @@ -614,7 +612,14 @@ public class InvisibleElementRemovalService { ElementFeatureLookup visibleElements, Set visitedXObjIds, Set markedContentToIgnore - ) { + ) implements AutoCloseable { + + @Override + public void close() { + + overlappedElements.close(); + visibleElements.close(); + } } diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/PDFNetUtils.java b/src/main/java/com/iqser/red/pdftronlogic/commons/PDFNetUtils.java new file mode 100644 index 0000000..2ab8cdc --- /dev/null +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/PDFNetUtils.java @@ -0,0 +1,49 @@ +package com.iqser.red.pdftronlogic.commons; + +import java.lang.reflect.Field; + +import com.pdftron.pdf.Font; + +import lombok.experimental.UtilityClass; + +@UtilityClass +public class PDFNetUtils { + + @SuppressWarnings("PMD") + public void requireFontNotClosed(Font font) { + + try { + if (font.__GetHandle() == 0L) { + throw new AssertionError("Font is already closed!"); + } + Object refHandle = font.__GetRefHandle(); + + Class clazz = refHandle.getClass(); + + Field implField = null; + while (clazz != null) { + try { + + implField = clazz.getDeclaredField("impl"); + implField.setAccessible(true); + break; + } catch (NoSuchFieldException e) { + clazz = clazz.getSuperclass(); + } + } + + if (implField != null) { + long implValue = (Long) implField.get(refHandle); + + if (implValue == 0L) { + throw new AssertionError("Associated ElementReader of Font is already closed!"); + } + } + + } catch (IllegalAccessException e) { + throw new AssertionError("Font Ref is missing the field impl, should never happen!"); + } + } + + +} diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/PdfTextExtraction.java b/src/main/java/com/iqser/red/pdftronlogic/commons/PdfTextExtraction.java index 07084b3..c7e5738 100644 --- a/src/main/java/com/iqser/red/pdftronlogic/commons/PdfTextExtraction.java +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/PdfTextExtraction.java @@ -85,7 +85,7 @@ public class PdfTextExtraction { for (Element element = reader.next(); element != null; element = reader.next()) { switch (element.getType()) { - case Element.e_text -> textFeaturesOnPage.add(ElementFeatureFactory.buildText(element, includePathData)); + case Element.e_text -> textFeaturesOnPage.add(ElementFeatureFactory.buildText(element, includePathData, includePathData)); case Element.e_form -> { Obj formObj = element.getXObject(); diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/features/ElementFeatureFactory.java b/src/main/java/com/iqser/red/pdftronlogic/commons/features/ElementFeatureFactory.java index 95d61b4..4139abe 100644 --- a/src/main/java/com/iqser/red/pdftronlogic/commons/features/ElementFeatureFactory.java +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/features/ElementFeatureFactory.java @@ -1,6 +1,5 @@ package com.iqser.red.pdftronlogic.commons.features; -import java.awt.geom.GeneralPath; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -14,7 +13,7 @@ import com.pdftron.pdf.Element; import com.pdftron.pdf.Font; import com.pdftron.pdf.GState; import com.pdftron.pdf.Image; -import com.pdftron.pdf.PathData; +import com.pdftron.sdf.Obj; import lombok.SneakyThrows; import lombok.experimental.UtilityClass; @@ -26,7 +25,7 @@ public class ElementFeatureFactory { return switch (element.getType()) { case Element.e_path -> buildPath(element); - case Element.e_text -> buildText(element); + case Element.e_text -> buildText(element, false, false); case Element.e_image, Element.e_inline_image -> buildImage(element); case Element.e_form -> buildForm(element); // This technically should never happen, it's a safetynet @@ -72,7 +71,7 @@ public class ElementFeatureFactory { boolean masked = false; if (element.getType() == Element.e_image) { Image image = new Image(element.getXObject()); - if (image.getMask() != null) { + if (image.getMask() != null && image.getMask().getType() == Obj.e_stream) { Image imageMask = new Image(image.getMask()); masked = imageMask.isImageMask(); } @@ -94,27 +93,25 @@ public class ElementFeatureFactory { } - public TextFeatures buildText(Element element) throws PDFNetException { - - return buildText(element, false); - } - - /* - Use includePathData = true, when trying to draw the glyphs, see GlyphExtractionTest + Use includeGlyphs = true and preComputePathData = true, when trying to draw the glyphs, see GlyphExtractionTest + precomputePathData = true is needed, when trying to access the PathData after the PDFDoc/ElementReader has been closed */ - public TextFeatures buildText(Element element, boolean includePathData) throws PDFNetException { + public TextFeatures buildText(Element element, boolean includeGlyphs, boolean preComputePathData) throws PDFNetException { try (var bbox = element.getBBox()) { - return TextFeatures.builder() + TextFeatures.TextFeaturesBuilder simpleTextFeatures = TextFeatures.builder() .elementType(element.getType()) .boundingBox(Converter.toRectangle2D(bbox)) .text(element.getTextString()) .font(element.getGState().getFont().getType()) - .fontsize(element.getGState().getFontSize()) - .glyphs(extractGlyphInfo(element, includePathData)) - .build(); + .fontsize(element.getGState().getFontSize()); + + if (includeGlyphs) { + simpleTextFeatures.glyphs(extractGlyphInfo(element, preComputePathData)); + } + return simpleTextFeatures.build(); } } @@ -139,7 +136,7 @@ public class ElementFeatureFactory { @SneakyThrows - private List extractGlyphInfo(Element textElement, boolean includePathData) { + private List extractGlyphInfo(Element textElement, boolean precomputePathData) { assert textElement != null && textElement.getType() == Element.e_text; @@ -157,34 +154,29 @@ public class ElementFeatureFactory { } List glyphs = new ArrayList<>(); + short unitsPerEm = font.getUnitsPerEm(); + + try (CharIterator charIterator = textElement.getCharIterator(); Matrix2D ctm = textElement.getCTM().multiply(textElement.getTextMatrix());) { - try (CharIterator charIterator = textElement.getCharIterator()) { while (charIterator.hasNext()) { CharData charData = charIterator.next(); long charCode = charData.getCharCode(); - String glyphText = new String(font.mapToUnicode(charCode)); - if (Character.isWhitespace(glyphText.charAt(0))) { - continue; - } + try (Matrix2D fontMatrix = computeFontMatrix(charData, textElement, unitsPerEm)) { - try (Matrix2D fontMatrix = computeFontMatrix(charData, textElement, font); // - Matrix2D glyphMatrix = textElement.getCTM()// - .multiply(textElement.getTextMatrix())// - .multiply(fontMatrix)) { - PathData pathData = font.getGlyphPath(charCode, true, glyphMatrix); - if (pathData.getOperators().length == 1 && pathData.getOperators()[0] == 6) { - // This happens for some chinese characters or whitespaces, don't know why... - continue; + GlyphInfo glyph = GlyphInfo.builder() // + .charCode(charCode) // + .cachePathData(precomputePathData) // + .glyphMatrix(ctm.multiply(fontMatrix)) // + .font(font) // + .build(); + + glyphs.add(glyph); + + if (precomputePathData) { + // call the functions once to cache all data + glyph.getBoundingBox(); } - GeneralPath glyphPath = Converter.convertToGeneralPath(pathData); - GlyphInfo.GlyphInfoBuilder glyphInfo = GlyphInfo.builder().unicode(glyphText).bbox(glyphPath.getBounds2D()); - - if (includePathData) { - glyphInfo.pathData(pathData); - } - - glyphs.add(glyphInfo.build()); } } } @@ -194,9 +186,9 @@ public class ElementFeatureFactory { } - private Matrix2D computeFontMatrix(CharData charData, Element textElement, Font font) throws PDFNetException { + private Matrix2D computeFontMatrix(CharData charData, Element textElement, short unitsPerEm) throws PDFNetException { - double yScaleFactor = textElement.getGState().getFontSize() / font.getUnitsPerEm(); + double yScaleFactor = textElement.getGState().getFontSize() / unitsPerEm; double xScaleFactor = (textElement.getGState().getHorizontalScale() / 100) * yScaleFactor; return new Matrix2D(xScaleFactor, 0, 0, -yScaleFactor, charData.getGlyphX(), charData.getGlyphY()); diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/features/ElementFeatures.java b/src/main/java/com/iqser/red/pdftronlogic/commons/features/ElementFeatures.java index ba28042..3622ee4 100644 --- a/src/main/java/com/iqser/red/pdftronlogic/commons/features/ElementFeatures.java +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/features/ElementFeatures.java @@ -94,4 +94,9 @@ public class ElementFeatures { return overlapShape.contains(ComparisonUtils.shrinkRectangle(boundingBox)); } + + public void destroy() { + // do nothing, except for text + } + } diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/features/GlyphInfo.java b/src/main/java/com/iqser/red/pdftronlogic/commons/features/GlyphInfo.java index 7d70804..c607dd8 100644 --- a/src/main/java/com/iqser/red/pdftronlogic/commons/features/GlyphInfo.java +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/features/GlyphInfo.java @@ -1,30 +1,40 @@ package com.iqser.red.pdftronlogic.commons.features; -import java.awt.Shape; import java.awt.geom.Rectangle2D; import java.util.Optional; import com.iqser.red.pdftronlogic.commons.ComparisonUtils; +import com.iqser.red.pdftronlogic.commons.Converter; +import com.iqser.red.pdftronlogic.commons.PDFNetUtils; +import com.pdftron.common.Matrix2D; +import com.pdftron.common.PDFNetException; +import com.pdftron.pdf.Font; import com.pdftron.pdf.PathData; import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; -import lombok.Getter; import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; import lombok.experimental.FieldDefaults; +import lombok.extern.slf4j.Slf4j; +@Slf4j @Builder @AllArgsConstructor @RequiredArgsConstructor @FieldDefaults(level = AccessLevel.PRIVATE) public class GlyphInfo { - @Getter - final String unicode; - @Getter - final Rectangle2D bbox; - final PathData pathData; + final Matrix2D glyphMatrix; + final long charCode; + final Font font; + + // in order to speed up invisible element removal, we only calculate the pathdata where necessary, as it is the costliest operation. + // It will only work as long as the associated ElementReader is still open, as the Font is bound to the ContentStream being read. + Rectangle2D bbox; + final boolean cachePathData; + PathData pathData; boolean overlapped; ElementFeatures overlappingElement; @@ -35,8 +45,12 @@ public class GlyphInfo { if (overlapped) { return true; } + Optional bbox = getBoundingBox(); + if (bbox.isEmpty()) { + return true; + } - if (overlappingElement.getOverlapShape().contains(ComparisonUtils.shrinkRectangle(bbox))) { + if (overlappingElement.getOverlapShape().contains(ComparisonUtils.shrinkRectangle(bbox.get()))) { overlapped = true; this.overlappingElement = overlappingElement; } @@ -46,25 +60,57 @@ public class GlyphInfo { } - public boolean matches(GlyphInfo glyph2) { + public String getUnicode() { - return unicode.equals(glyph2.unicode)// - && calculateIntersectedArea(glyph2.bbox, bbox) > 0.9 * Math.min(bbox.getWidth() * bbox.getHeight(), glyph2.bbox.getHeight() * glyph2.bbox.getWidth()); + try { + return new String(font.mapToUnicode(charCode)); + } catch (PDFNetException e) { + return ""; + } } + @SneakyThrows public Optional getPathData() { - return Optional.ofNullable(pathData); + if (pathData == null) { + + PDFNetUtils.requireFontNotClosed(font); + + PathData computedPathData = font.getGlyphPath(charCode, true, glyphMatrix); + if (computedPathData.getOperators().length == 1 && computedPathData.getOperators()[0] == 6) { + // This happens for some chinese characters or whitespaces, don't know why... + return Optional.empty(); + } + if (cachePathData) { + pathData = computedPathData; + } + return Optional.of(computedPathData); + } + return Optional.of(pathData); } - private static double calculateIntersectedArea(Rectangle2D r1, Rectangle2D r2) { + @SneakyThrows + public Optional getBoundingBox() { - double xOverlap = Math.max(0, Math.min(r1.getMaxX(), r2.getMaxX()) - Math.max(r1.getMinX(), r2.getMinX())); - double yOverlap = Math.max(0, Math.min(r1.getMaxY(), r2.getMaxY()) - Math.max(r1.getY(), r2.getY())); + if (bbox == null) { + Optional pathData = getPathData(); + if (pathData.isEmpty()) { + return Optional.empty(); + } + bbox = Converter.convertToGeneralPath(pathData.get()).getBounds2D(); + } + return Optional.of(bbox); + } - return xOverlap * yOverlap; + + @SneakyThrows + public void destroy() { + + if (glyphMatrix != null) { + glyphMatrix.close(); + } } } diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/features/TextFeatures.java b/src/main/java/com/iqser/red/pdftronlogic/commons/features/TextFeatures.java index f5be3ea..175311b 100644 --- a/src/main/java/com/iqser/red/pdftronlogic/commons/features/TextFeatures.java +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/features/TextFeatures.java @@ -5,8 +5,6 @@ import static com.iqser.red.pdftronlogic.commons.ComparisonUtils.almostEqual; import java.util.ArrayList; import java.util.List; -import com.iqser.red.pdftronlogic.commons.ComparisonUtils; - import lombok.AccessLevel; import lombok.Builder; import lombok.EqualsAndHashCode; @@ -24,6 +22,7 @@ public class TextFeatures extends ElementFeatures { String text; int font; double fontsize; + @Builder.Default List glyphs = new ArrayList<>(); @@ -41,20 +40,6 @@ public class TextFeatures extends ElementFeatures { } - private boolean glyphsMatch(TextFeatures textFeaturesElement) { - - if (glyphs.size() != textFeaturesElement.getGlyphs().size()) { - return false; - } - for (int i = 0; i < glyphs.size(); i++) { - if (!glyphs.get(i).matches(textFeaturesElement.getGlyphs().get(i))) { - return false; - } - } - return true; - } - - public boolean testOverlapped(ElementFeatures overlappingElement) { if (glyphs.isEmpty()) { @@ -65,4 +50,11 @@ public class TextFeatures extends ElementFeatures { .allMatch(glyph -> glyph.testOverlapped(overlappingElement)); } + + @Override + public void destroy() { + + glyphs.forEach(GlyphInfo::destroy); + } + } diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/lookup/ElementFeatureLookup.java b/src/main/java/com/iqser/red/pdftronlogic/commons/lookup/ElementFeatureLookup.java index f2b2a05..8234a62 100644 --- a/src/main/java/com/iqser/red/pdftronlogic/commons/lookup/ElementFeatureLookup.java +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/lookup/ElementFeatureLookup.java @@ -1,15 +1,16 @@ package com.iqser.red.pdftronlogic.commons.lookup; import java.awt.geom.Rectangle2D; -import java.util.HashSet; +import java.util.ArrayList; +import java.util.LinkedList; import java.util.List; import java.util.Optional; -import java.util.Set; import java.util.function.Consumer; import java.util.function.Predicate; import com.iqser.red.pdftronlogic.commons.Converter; import com.iqser.red.pdftronlogic.commons.features.ElementFeatures; +import com.pdftron.pdf.Element; import com.pdftron.pdf.Rect; import lombok.AccessLevel; @@ -21,13 +22,13 @@ import lombok.experimental.FieldDefaults; @Getter @RequiredArgsConstructor @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) -public class ElementFeatureLookup { +public class ElementFeatureLookup implements AutoCloseable { /* - This class looks a bit weird and uses visitors since I tried to use the quadtree implementation by locationtech, since it uses Rectangles by default to query its data structure. - Unfortunately there were always edge cases, where it lost a few elements making it completely unusable. Further, it didn't even speed up the algorithm all that much. + This class looks a bit weird and uses visitors since I tried to use the quadtree implementation by locationtech, as it uses Rectangles by default to query its data structure. + Unfortunately there were always edge cases where it lost a few elements making it completely unusable. Further, it didn't even speed up the algorithm all that much. */ - Set allElements = new HashSet<>(); + List allElements = new ArrayList<>(); public void add(ElementFeatures elementFeatures) { @@ -69,9 +70,23 @@ public class ElementFeatureLookup { public List findOverlapped(ElementFeatures overlappingElement, boolean textOnly) { - OverlapVisitor overlapVisitor = new OverlapVisitor(overlappingElement, textOnly); - forEach(overlapVisitor::visitItem); - return overlapVisitor.getOverlappedElementFeatures(); + List overlappedElementFeatures = new LinkedList<>(); + + for (int i = 0; i < allElements.size(); i++) { + ElementFeatures features = allElements.get(i); + + if (textOnly && features.getElementType() != Element.e_text) { + continue; + } + + if (features.getBoundingBox().intersects(overlappingElement.getBoundingBox())) { + if (features.testOverlapped(overlappingElement)) { + overlappedElementFeatures.add(features); + } + } + } + + return overlappedElementFeatures; } @@ -101,13 +116,20 @@ public class ElementFeatureLookup { public void addAll(List currentOverlappedElements) { - currentOverlappedElements.forEach(this::add); + allElements.addAll(currentOverlappedElements); } public void removeAll(List currentOverlappedElements) { - currentOverlappedElements.forEach(this::remove); + allElements.removeAll(currentOverlappedElements); + } + + + @Override + public void close() { + + allElements.forEach(ElementFeatures::destroy); } } diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/lookup/OverlapVisitor.java b/src/main/java/com/iqser/red/pdftronlogic/commons/lookup/OverlapVisitor.java deleted file mode 100644 index 4d1fdbd..0000000 --- a/src/main/java/com/iqser/red/pdftronlogic/commons/lookup/OverlapVisitor.java +++ /dev/null @@ -1,42 +0,0 @@ -package com.iqser.red.pdftronlogic.commons.lookup; - -import java.util.LinkedList; -import java.util.List; - -import com.iqser.red.pdftronlogic.commons.ComparisonUtils; -import com.iqser.red.pdftronlogic.commons.features.ElementFeatures; -import com.pdftron.pdf.Element; - -import lombok.AccessLevel; -import lombok.Getter; -import lombok.RequiredArgsConstructor; -import lombok.experimental.FieldDefaults; - -@RequiredArgsConstructor -@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) -public class OverlapVisitor implements ElementFeatureVisitor { - - ElementFeatures overlappingElement; - - boolean textOnly; - - @Getter - List overlappedElementFeatures = new LinkedList<>(); - - - @Override - public void visitItem(ElementFeatures features) { - - if (textOnly && features.getElementType() != Element.e_text) { - return; - } - - if (ComparisonUtils.padRectangle(features.getBoundingBox()).intersects(ComparisonUtils.padRectangle(overlappingElement.getBoundingBox()))) { - if (features.testOverlapped(overlappingElement)) { - overlappedElementFeatures.add(features); - } - } - - } - -} diff --git a/src/test/java/com/iqser/red/pdftronlogic/commons/GlyphExtractionTest.java b/src/test/java/com/iqser/red/pdftronlogic/commons/GlyphExtractionTest.java index 068d75f..0b4a2ad 100644 --- a/src/test/java/com/iqser/red/pdftronlogic/commons/GlyphExtractionTest.java +++ b/src/test/java/com/iqser/red/pdftronlogic/commons/GlyphExtractionTest.java @@ -8,7 +8,9 @@ import java.io.FileOutputStream; import java.nio.file.Path; import java.util.List; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import com.iqser.red.pdftronlogic.commons.features.GlyphInfo; @@ -23,10 +25,11 @@ import com.pdftron.sdf.SDFDoc; import lombok.SneakyThrows; +@Disabled // makes no sense to run in pipeline public class GlyphExtractionTest { - @BeforeEach - void createService() { + @BeforeAll + static void init() { PDFNet.initialize(PDFTronConfig.license); } @@ -69,11 +72,11 @@ public class GlyphExtractionTest { for (GlyphInfo glyph : textFeatures.getGlyphs()) { - if (glyph.getPathData().isPresent()) { + if (glyph.getPathData().isPresent() && glyph.getBoundingBox().isPresent()) { drawPathData(glyph.getPathData().get(), builder, writer, Color.BLACK); + drawRect(ComparisonUtils.shrinkRectangle(glyph.getBoundingBox().get()), builder, writer, Color.RED); + drawRect(glyph.getBoundingBox().get(), builder, writer, Color.MAGENTA); } - drawRect(ComparisonUtils.shrinkRectangle(glyph.getBbox()), builder, writer, Color.RED); - drawRect(glyph.getBbox(), builder, writer, Color.MAGENTA); } } diff --git a/src/test/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalServiceTest.java b/src/test/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalServiceTest.java index a928f55..20f86b6 100644 --- a/src/test/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalServiceTest.java +++ b/src/test/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalServiceTest.java @@ -8,6 +8,7 @@ import java.io.FileOutputStream; import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -22,10 +23,16 @@ class InvisibleElementRemovalServiceTest { InvisibleElementRemovalService invisibleElementRemovalService; - @BeforeEach - void createService() { + @BeforeAll + static void init() { PDFNet.initialize(PDFTronConfig.license); + } + + + @BeforeEach + void createServices() { + invisibleElementRemovalService = new InvisibleElementRemovalService(); }