diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/LegacyPDFStreamEngine.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/LegacyPDFStreamEngine.java
new file mode 100644
index 00000000..4bc6d0a0
--- /dev/null
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/LegacyPDFStreamEngine.java
@@ -0,0 +1,377 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.iqser.red.service.redaction.v1.server.parsing;
+
+import java.io.InputStream;
+import java.io.IOException;
+import java.util.Map;
+import java.util.WeakHashMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.fontbox.ttf.TrueTypeFont;
+import org.apache.fontbox.util.BoundingBox;
+
+import org.apache.pdfbox.contentstream.PDFStreamEngine;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.font.encoding.GlyphList;
+import org.apache.pdfbox.pdmodel.common.PDRectangle;
+import org.apache.pdfbox.pdmodel.font.PDCIDFont;
+import org.apache.pdfbox.pdmodel.font.PDCIDFontType2;
+import org.apache.pdfbox.pdmodel.font.PDFont;
+import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
+import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont;
+import org.apache.pdfbox.pdmodel.font.PDType0Font;
+import org.apache.pdfbox.pdmodel.font.PDType3Font;
+import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState;
+import org.apache.pdfbox.text.TextPosition;
+import org.apache.pdfbox.util.Matrix;
+import org.apache.pdfbox.util.Vector;
+import org.apache.pdfbox.contentstream.operator.DrawObject;
+import org.apache.pdfbox.contentstream.operator.state.Concatenate;
+import org.apache.pdfbox.contentstream.operator.state.Restore;
+import org.apache.pdfbox.contentstream.operator.state.Save;
+import org.apache.pdfbox.contentstream.operator.state.SetGraphicsStateParameters;
+import org.apache.pdfbox.contentstream.operator.state.SetMatrix;
+import org.apache.pdfbox.contentstream.operator.text.BeginText;
+import org.apache.pdfbox.contentstream.operator.text.EndText;
+import org.apache.pdfbox.contentstream.operator.text.SetFontAndSize;
+import org.apache.pdfbox.contentstream.operator.text.SetTextHorizontalScaling;
+import org.apache.pdfbox.contentstream.operator.text.ShowTextAdjusted;
+import org.apache.pdfbox.contentstream.operator.text.ShowTextLine;
+import org.apache.pdfbox.contentstream.operator.text.ShowTextLineAndSpace;
+import org.apache.pdfbox.contentstream.operator.text.MoveText;
+import org.apache.pdfbox.contentstream.operator.text.MoveTextSetLeading;
+import org.apache.pdfbox.contentstream.operator.text.NextLine;
+import org.apache.pdfbox.contentstream.operator.text.SetCharSpacing;
+import org.apache.pdfbox.contentstream.operator.text.SetTextLeading;
+import org.apache.pdfbox.contentstream.operator.text.SetTextRenderingMode;
+import org.apache.pdfbox.contentstream.operator.text.SetTextRise;
+import org.apache.pdfbox.contentstream.operator.text.SetWordSpacing;
+import org.apache.pdfbox.contentstream.operator.text.ShowText;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.pdmodel.font.PDFontDescriptor;
+
+/**
+ * LEGACY text calculations which are known to be incorrect but are depended on by PDFTextStripper.
+ *
+ * This class exists only so that we don't break the code of users who have their own subclasses of
+ * PDFTextStripper. It replaces the mostly empty implementation of showGlyph() in PDFStreamEngine
+ * with a heuristic implementation which is backwards compatible.
+ *
+ * DO NOT USE THIS CODE UNLESS YOU ARE WORKING WITH PDFTextStripper.
+ * THIS CODE IS DELIBERATELY INCORRECT, USE PDFStreamEngine INSTEAD.
+ */
+@SuppressWarnings({"PMD", "checkstyle:all"})
+class LegacyPDFStreamEngine extends PDFStreamEngine
+{
+    private static final Log LOG = LogFactory.getLog(LegacyPDFStreamEngine.class);
+
+    private int pageRotation;
+    private PDRectangle pageSize;
+    private Matrix translateMatrix;
+    private final GlyphList glyphList;
+    private final Map<COSDictionary, Float> fontHeightMap = new WeakHashMap<COSDictionary, Float>();
+
+    /**
+     * Constructor.
+     */
+    LegacyPDFStreamEngine() throws IOException
+    {
+        addOperator(new BeginText());
+        addOperator(new Concatenate());
+        addOperator(new DrawObject()); // special text version
+        addOperator(new EndText());
+        addOperator(new SetGraphicsStateParameters());
+        addOperator(new Save());
+        addOperator(new Restore());
+        addOperator(new NextLine());
+        addOperator(new SetCharSpacing());
+        addOperator(new MoveText());
+        addOperator(new MoveTextSetLeading());
+        addOperator(new SetFontAndSize());
+        addOperator(new ShowText());
+        addOperator(new ShowTextAdjusted());
+        addOperator(new SetTextLeading());
+        addOperator(new SetMatrix());
+        addOperator(new SetTextRenderingMode());
+        addOperator(new SetTextRise());
+        addOperator(new SetWordSpacing());
+        addOperator(new SetTextHorizontalScaling());
+        addOperator(new ShowTextLine());
+        addOperator(new ShowTextLineAndSpace());
+
+        // load additional glyph list for Unicode mapping
+        String path = "/org/apache/pdfbox/resources/glyphlist/additional.txt";
+        InputStream input = GlyphList.class.getResourceAsStream(path);
+        glyphList = new GlyphList(GlyphList.getAdobeGlyphList(), input);
+    }
+
+    /**
+     * This will initialize and process the contents of the stream.
+     *
+     * @param page the page to process
+     * @throws java.io.IOException if there is an error accessing the stream.
+     */
+    @Override
+    public void processPage(PDPage page) throws IOException
+    {
+        this.pageRotation = page.getRotation();
+        this.pageSize = page.getCropBox();
+
+        if (pageSize.getLowerLeftX() == 0 && pageSize.getLowerLeftY() == 0)
+        {
+            translateMatrix = null;
+        }
+        else
+        {
+            // translation matrix for cropbox
+            translateMatrix = Matrix.getTranslateInstance(-pageSize.getLowerLeftX(), -pageSize.getLowerLeftY());
+        }
+        super.processPage(page);
+    }
+
+    /**
+     * Called when a glyph is to be processed. The heuristic calculations here were originally
+     * written by Ben Litchfield for PDFStreamEngine.
+     */
+    @Override
+    protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code,
+            String unicode,
+            Vector displacement)
+            throws IOException
+    {
+        //
+        // legacy calculations which were previously in PDFStreamEngine
+        //
+        //  DO NOT USE THIS CODE UNLESS YOU ARE WORKING WITH PDFTextStripper.
+        //  THIS CODE IS DELIBERATELY INCORRECT
+        //
+
+        PDGraphicsState state = getGraphicsState();
+        Matrix ctm = state.getCurrentTransformationMatrix();
+        float fontSize = state.getTextState().getFontSize();
+        float horizontalScaling = state.getTextState().getHorizontalScaling() / 100f;
+        Matrix textMatrix = getTextMatrix();
+
+        float displacementX = displacement.getX();
+        // the sorting algorithm is based on the width of the character. As the displacement
+        // for vertical characters doesn't provide any suitable value for it, we have to 
+        // calculate our own
+        if (font.isVertical())
+        {
+            displacementX = font.getWidth(code) / 1000;
+            // there may be an additional scaling factor for true type fonts
+            TrueTypeFont ttf = null;
+            if (font instanceof PDTrueTypeFont)
+            {
+                ttf = ((PDTrueTypeFont)font).getTrueTypeFont();
+            }
+            else if (font instanceof PDType0Font)
+            {
+                PDCIDFont cidFont = ((PDType0Font)font).getDescendantFont();
+                if (cidFont instanceof PDCIDFontType2)
+                {
+                    ttf = ((PDCIDFontType2)cidFont).getTrueTypeFont();
+                }
+            }
+            if (ttf != null && ttf.getUnitsPerEm() != 1000)
+            {
+                displacementX *= 1000f / ttf.getUnitsPerEm();
+            }
+        }
+
+        //
+        // legacy calculations which were previously in PDFStreamEngine
+        //
+        //  DO NOT USE THIS CODE UNLESS YOU ARE WORKING WITH PDFTextStripper.
+        //  THIS CODE IS DELIBERATELY INCORRECT
+        //
+
+        // (modified) combined displacement, this is calculated *without* taking the character
+        // spacing and word spacing into account, due to legacy code in TextStripper
+        float tx = displacementX * fontSize * horizontalScaling;
+        float ty = displacement.getY() * fontSize;
+
+        // (modified) combined displacement matrix
+        Matrix td = Matrix.getTranslateInstance(tx, ty);
+
+        // (modified) text rendering matrix
+        Matrix nextTextRenderingMatrix = td.multiply(textMatrix).multiply(ctm); // text space -> device space
+        float nextX = nextTextRenderingMatrix.getTranslateX();
+        float nextY = nextTextRenderingMatrix.getTranslateY();
+
+        // (modified) width and height calculations
+        float dxDisplay = nextX - textRenderingMatrix.getTranslateX();
+        Float fontHeight = fontHeightMap.get(font.getCOSObject());
+        if (fontHeight == null)
+        {
+            fontHeight = computeFontHeight(font);
+            fontHeightMap.put(font.getCOSObject(), fontHeight);
+        }
+        float dyDisplay = fontHeight * textRenderingMatrix.getScalingFactorY();
+
+        //
+        // start of the original method
+        //
+
+        // Note on variable names. There are three different units being used in this code.
+        // Character sizes are given in glyph units, text locations are initially given in text
+        // units, and we want to save the data in display units. The variable names should end with
+        // Text or Disp to represent if the values are in text or disp units (no glyph units are
+        // saved).
+
+        float glyphSpaceToTextSpaceFactor = 1 / 1000f;
+        if (font instanceof PDType3Font)
+        {
+            glyphSpaceToTextSpaceFactor = font.getFontMatrix().getScaleX();
+        }
+
+        float spaceWidthText = 0;
+        try
+        {
+            // to avoid crash as described in PDFBOX-614, see what the space displacement should be
+            spaceWidthText = font.getSpaceWidth() * glyphSpaceToTextSpaceFactor;
+        }
+        catch (Throwable exception)
+        {
+            LOG.warn(exception, exception);
+        }
+
+        if (spaceWidthText == 0)
+        {
+            spaceWidthText = font.getAverageFontWidth() * glyphSpaceToTextSpaceFactor;
+            // the average space width appears to be higher than necessary so make it smaller
+            spaceWidthText *= .80f;
+        }
+        if (spaceWidthText == 0)
+        {
+            spaceWidthText = 1.0f; // if could not find font, use a generic value
+        }
+
+        // the space width has to be transformed into display units
+        float spaceWidthDisplay = spaceWidthText * textRenderingMatrix.getScalingFactorX();
+
+        // use our additional glyph list for Unicode mapping
+        String unicodeMapping = font.toUnicode(code, glyphList);
+
+        // when there is no Unicode mapping available, Acrobat simply coerces the character code
+        // into Unicode, so we do the same. Subclasses of PDFStreamEngine don't necessarily want
+        // this, which is why we leave it until this point in PDFTextStreamEngine.
+        if (unicodeMapping == null)
+        {
+            if (font instanceof PDSimpleFont)
+            {
+                char c = (char) code;
+                unicodeMapping = new String(new char[] { c });
+            }
+            else
+            {
+                // Acrobat doesn't seem to coerce composite font's character codes, instead it
+                // skips them. See the "allah2.pdf" TestTextStripper file.
+                return;
+            }
+        }
+
+        // adjust for cropbox if needed
+        Matrix translatedTextRenderingMatrix;
+        if (translateMatrix == null)
+        {
+            translatedTextRenderingMatrix = textRenderingMatrix;
+        }
+        else
+        {
+            translatedTextRenderingMatrix = Matrix.concatenate(translateMatrix, textRenderingMatrix);
+            nextX -= pageSize.getLowerLeftX();
+            nextY -= pageSize.getLowerLeftY();
+        }
+
+        processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(),
+                pageSize.getHeight(), translatedTextRenderingMatrix, nextX, nextY,
+                Math.abs(dyDisplay), dxDisplay,
+                Math.abs(spaceWidthDisplay), unicodeMapping, new int[] { code }, font,
+                fontSize,
+                (int)(fontSize * textMatrix.getScalingFactorX())));
+    }
+
+    /**
+     * Compute the font height. Override this if you want to use own calculations.
+     * 
+     * @param font the font.
+     * @return the font height.
+     * 
+     * @throws IOException if there is an error while getting the font bounding box.
+     */
+    protected float computeFontHeight(PDFont font) throws IOException
+    {
+        BoundingBox bbox = font.getBoundingBox();
+        if (bbox.getLowerLeftY() < Short.MIN_VALUE)
+        {
+            // PDFBOX-2158 and PDFBOX-3130
+            // files by Salmat eSolutions / ClibPDF Library
+            bbox.setLowerLeftY(- (bbox.getLowerLeftY() + 65536));
+        }
+        // 1/2 the bbox is used as the height todo: why?
+        float glyphHeight = bbox.getHeight() / 2;
+
+        // sometimes the bbox has very high values, but CapHeight is OK
+        PDFontDescriptor fontDescriptor = font.getFontDescriptor();
+        if (fontDescriptor != null)
+        {
+            float capHeight = fontDescriptor.getCapHeight();
+            if (Float.compare(capHeight, 0) != 0 &&
+                    (capHeight < glyphHeight || Float.compare(glyphHeight, 0) == 0))
+            {
+                glyphHeight = capHeight;
+            }
+            // PDFBOX-3464, PDFBOX-4480, PDFBOX-4553:
+            // sometimes even CapHeight has very high value, but Ascent and Descent are ok
+            float ascent = fontDescriptor.getAscent();
+            float descent = fontDescriptor.getDescent();
+            if (capHeight > ascent && ascent > 0 && descent < 0 &&
+                    ((ascent - descent) / 2 < glyphHeight || Float.compare(glyphHeight, 0) == 0))
+            {
+                glyphHeight = (ascent - descent) / 2;
+            }
+        }
+
+        // transformPoint from glyph space -> text space
+        float height;
+        if (font instanceof PDType3Font)
+        {
+            height = font.getFontMatrix().transformPoint(0, glyphHeight).y;
+        }
+        else
+        {
+            height = glyphHeight / 1000;
+        }
+
+        return height;
+    }
+
+    /**
+     * A method provided as an event interface to allow a subclass to perform some specific
+     * functionality when text needs to be processed.
+     *
+     * @param text The text to be processed.
+     */
+    protected void processTextPosition(TextPosition text)
+    {
+        // subclasses can override to provide specific functionality
+    }
+}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java
index 95fdad6a..6e46257d 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java
@@ -36,7 +36,6 @@ import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.pdmodel.graphics.PDXObject;
 import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
-import org.apache.pdfbox.text.PDFTextStripper;
 import org.apache.pdfbox.text.TextPosition;
 import org.apache.pdfbox.util.Matrix;
 
@@ -125,11 +124,9 @@ public class PDFLinesTextStripper extends PDFTextStripper {
 
                     // The direction of vertical lines must always be from bottom to top for the table extraction algorithm.
                     if (pos.getY() > path_y) {
-                        graphicsPath.add(new Ruling(new Point2D.Float(path_x, path_y), new Point2D.Float((float) pos.getX(), (float) pos
-                                .getY())));
+                        graphicsPath.add(new Ruling(new Point2D.Float(path_x, path_y), new Point2D.Float((float) pos.getX(), (float) pos.getY())));
                     } else {
-                        graphicsPath.add(new Ruling(new Point2D.Float(path_x, (float) pos.getY()), new Point2D.Float((float) pos
-                                .getX(), path_y)));
+                        graphicsPath.add(new Ruling(new Point2D.Float(path_x, (float) pos.getY()), new Point2D.Float((float) pos.getX(), path_y)));
                     }
 
                     path_x = (float) pos.getX();
@@ -150,25 +147,19 @@ public class PDFLinesTextStripper extends PDFTextStripper {
                     Point2D p2 = transformPosition(x + width, y + height);
 
                     // Horizontal lines
-                    graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p1.getY()), new Point2D.Float((float) p2
-                            .getX(), (float) p1.getY())));
-                    graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p2.getY()), new Point2D.Float((float) p2
-                            .getX(), (float) p2.getY())));
+                    graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p1.getY()), new Point2D.Float((float) p2.getX(), (float) p1.getY())));
+                    graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p2.getY()), new Point2D.Float((float) p2.getX(), (float) p2.getY())));
 
                     // Vertical lines, direction must always be from bottom to top for the table extraction algorithm.
                     if (p2.getY() > p1.getY()) {
-                        graphicsPath.add(new Ruling(new Point2D.Float((float) p2.getX(), (float) p1.getY()), new Point2D.Float((float) p2
-                                .getX(), (float) p2.getY())));
+                        graphicsPath.add(new Ruling(new Point2D.Float((float) p2.getX(), (float) p1.getY()), new Point2D.Float((float) p2.getX(), (float) p2.getY())));
                     } else {
-                        graphicsPath.add(new Ruling(new Point2D.Float((float) p2.getX(), (float) p2.getY()), new Point2D.Float((float) p2
-                                .getX(), (float) p1.getY())));
+                        graphicsPath.add(new Ruling(new Point2D.Float((float) p2.getX(), (float) p2.getY()), new Point2D.Float((float) p2.getX(), (float) p1.getY())));
                     }
                     if (p2.getY() > p1.getY()) {
-                        graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p1.getY()), new Point2D.Float((float) p1
-                                .getX(), (float) p2.getY())));
+                        graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p1.getY()), new Point2D.Float((float) p1.getX(), (float) p2.getY())));
                     } else {
-                        graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p2.getY()), new Point2D.Float((float) p1
-                                .getX(), (float) p1.getY())));
+                        graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p2.getY()), new Point2D.Float((float) p1.getX(), (float) p1.getY())));
                     }
                 }
                 break;
@@ -211,8 +202,7 @@ public class PDFLinesTextStripper extends PDFTextStripper {
                 PDImageXObject image = (PDImageXObject) xobject;
                 Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix();
 
-                Rectangle2D rect = new Rectangle2D.Float(ctmNew.getTranslateX(), ctmNew.getTranslateY(), ctmNew.getScaleX(), ctmNew
-                        .getScaleY());
+                Rectangle2D rect = new Rectangle2D.Float(ctmNew.getTranslateX(), ctmNew.getTranslateY(), ctmNew.getScaleX(), ctmNew.getScaleY());
 
                 // Memory Hack - sofReference kills me
                 FieldUtils.writeField(image, "cachedImageSubsampling", -1, true);
@@ -295,9 +285,7 @@ public class PDFLinesTextStripper extends PDFTextStripper {
 
             if (i == 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
                     .getUnicode()
-                    .equals("\u00A0") || textPositions.get(i)
-                    .getUnicode()
-                    .equals("\t"))) {
+                    .equals("\u00A0") || textPositions.get(i).getUnicode().equals("\t"))) {
                 startIndex++;
                 continue;
             }
@@ -307,7 +295,9 @@ public class PDFLinesTextStripper extends PDFTextStripper {
                 List<TextPosition> sublist = textPositions.subList(startIndex, i);
                 if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
                         .getUnicode()
-                        .equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0).getUnicode().equals("\t")))) {
+                        .equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
+                        .getUnicode()
+                        .equals("\t")))) {
                     textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
                 }
                 startIndex = i;
@@ -318,7 +308,9 @@ public class PDFLinesTextStripper extends PDFTextStripper {
                 List<TextPosition> sublist = textPositions.subList(startIndex, i);
                 if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
                         .getUnicode()
-                        .equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0).getUnicode().equals("\t")))) {
+                        .equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
+                        .getUnicode()
+                        .equals("\t")))) {
                     textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
                 }
                 startIndex = i;
@@ -332,7 +324,9 @@ public class PDFLinesTextStripper extends PDFTextStripper {
                 List<TextPosition> sublist = textPositions.subList(startIndex, i);
                 if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
                         .getUnicode()
-                        .equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0).getUnicode().equals("\t")))) {
+                        .equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
+                        .getUnicode()
+                        .equals("\t")))) {
 
                     // Remove false sequence ends (whitespaces)
                     if (previous != null && sublist.get(0).getYDirAdj() == previous.getYDirAdj() && sublist.get(0)
@@ -351,15 +345,15 @@ public class PDFLinesTextStripper extends PDFTextStripper {
         List<TextPosition> sublist = textPositions.subList(startIndex, textPositions.size());
         if (!sublist.isEmpty() && (sublist.get(sublist.size() - 1)
                 .getUnicode()
-                .equals(" ") || sublist.get(sublist.size() - 1).getUnicode().equals("\u00A0") || sublist.get(sublist.size() - 1).getUnicode().equals("\t"))) {
+                .equals(" ") || sublist.get(sublist.size() - 1)
+                .getUnicode()
+                .equals("\u00A0") || sublist.get(sublist.size() - 1).getUnicode().equals("\t"))) {
             sublist = sublist.subList(0, sublist.size() - 1);
         }
 
         if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0)
                 .getUnicode()
-                .equals("\u00A0") || sublist.get(0)
-                .getUnicode()
-                .equals("\t")))) {
+                .equals("\u00A0") || sublist.get(0).getUnicode().equals("\t")))) {
             if (previous != null && sublist.get(0).getYDirAdj() == previous.getYDirAdj() && sublist.get(0)
                     .getXDirAdj() - (previous.getXDirAdj() + previous.getWidthDirAdj()) < 0.01) {
                 for (TextPosition t : sublist) {
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFTextStripper.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFTextStripper.java
new file mode 100644
index 00000000..d6430715
--- /dev/null
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFTextStripper.java
@@ -0,0 +1,2182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.iqser.red.service.redaction.v1.server.parsing;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.LineNumberReader;
+import java.io.StringWriter;
+import java.io.Writer;
+import java.text.Bidi;
+import java.text.Normalizer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.StringTokenizer;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.regex.Pattern;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDPageTree;
+import org.apache.pdfbox.pdmodel.common.PDRectangle;
+import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
+import org.apache.pdfbox.pdmodel.interactive.pagenavigation.PDThreadBead;
+import org.apache.pdfbox.text.TextPosition;
+import org.apache.pdfbox.text.TextPositionComparator;
+import org.apache.pdfbox.util.QuickSort;
+
+/**
+ * This is just a copy except i only adjusted lines 594-607 cause this is a bug in Pdfbox.
+ * see S416.pdf
+ * */
+@SuppressWarnings({"PMD", "checkstyle:all"})
+public class PDFTextStripper extends LegacyPDFStreamEngine
+{
+
+    private static float defaultIndentThreshold = 2.0f;
+    private static float defaultDropThreshold = 2.5f;
+    private static final boolean useCustomQuickSort;
+
+    private static final Log LOG = LogFactory.getLog(PDFTextStripper.class);
+
+    // enable the ability to set the default indent/drop thresholds
+    // with -D system properties:
+    // pdftextstripper.indent
+    // pdftextstripper.drop
+    static
+    {
+        String strDrop = null, strIndent = null;
+        try
+        {
+            String className = PDFTextStripper.class.getSimpleName().toLowerCase();
+            String prop = className + ".indent";
+            strIndent = System.getProperty(prop);
+            prop = className + ".drop";
+            strDrop = System.getProperty(prop);
+        }
+        catch (SecurityException e)
+        {
+            // PDFBOX-1946 when run in an applet
+            // ignore and use default
+        }
+        if (strIndent != null && strIndent.length() > 0)
+        {
+            try
+            {
+                defaultIndentThreshold = Float.parseFloat(strIndent);
+            }
+            catch (NumberFormatException nfe)
+            {
+                // ignore and use default
+            }
+        }
+        if (strDrop != null && strDrop.length() > 0)
+        {
+            try
+            {
+                defaultDropThreshold = Float.parseFloat(strDrop);
+            }
+            catch (NumberFormatException nfe)
+            {
+                // ignore and use default
+            }
+        }
+    }
+    
+    static
+    {
+        // check if we need to use the custom quicksort algorithm as a
+        // workaround to the PDFBOX-1512 transitivity issue of TextPositionComparator:
+        boolean is16orLess = false;
+        try
+        {
+            String version = System.getProperty("java.specification.version");
+            StringTokenizer st = new StringTokenizer(version, ".");
+            int majorVersion = Integer.parseInt(st.nextToken());
+            int minorVersion = 0;
+            if (st.hasMoreTokens())
+            {
+                minorVersion = Integer.parseInt(st.nextToken());
+            }
+            is16orLess = majorVersion == 1 && minorVersion <= 6;
+        }
+        catch (SecurityException x)
+        {
+            // when run in an applet ignore and use default
+            // assume 1.7 or higher so that quicksort is used
+        }
+        catch (NumberFormatException nfe)
+        {
+            // should never happen, but if it does,
+            // assume 1.7 or higher so that quicksort is used
+        }
+        useCustomQuickSort = !is16orLess;
+    }
+
+    /**
+     * The platform's line separator.
+     */
+    protected final String LINE_SEPARATOR = System.getProperty("line.separator");
+
+    private String lineSeparator = LINE_SEPARATOR;
+    private String wordSeparator = " ";
+    private String paragraphStart = "";
+    private String paragraphEnd = "";
+    private String pageStart = "";
+    private String pageEnd = LINE_SEPARATOR;
+    private String articleStart = "";
+    private String articleEnd = "";
+
+    private int currentPageNo = 0;
+    private int startPage = 1;
+    private int endPage = Integer.MAX_VALUE;
+    private PDOutlineItem startBookmark = null;
+
+    // 1-based bookmark pages
+    private int startBookmarkPageNumber = -1;
+    private int endBookmarkPageNumber = -1;
+
+    private PDOutlineItem endBookmark = null;
+    private boolean suppressDuplicateOverlappingText = true;
+    private boolean shouldSeparateByBeads = true;
+    private boolean sortByPosition = false;
+    private boolean addMoreFormatting = false;
+
+    private float indentThreshold = defaultIndentThreshold;
+    private float dropThreshold = defaultDropThreshold;
+
+    // we will need to estimate where to add spaces, these are used to help guess
+    private float spacingTolerance = .5f;
+    private float averageCharTolerance = .3f;
+
+    private List<PDRectangle> beadRectangles = null;
+
+    /**
+     * The charactersByArticle is used to extract text by article divisions. For example a PDF that has two columns like
+     * a newspaper, we want to extract the first column and then the second column. In this example the PDF would have 2
+     * beads(or articles), one for each column. The size of the charactersByArticle would be 5, because not all text on
+     * the screen will fall into one of the articles. The five divisions are shown below
+     *
+     * Text before first article
+     * first article text
+     * text between first article and second article
+     * second article text
+     * text after second article
+     *
+     * Most PDFs won't have any beads, so charactersByArticle will contain a single entry.
+     */
+    protected ArrayList<List<TextPosition>> charactersByArticle = new ArrayList<List<TextPosition>>();
+
+    private Map<String, TreeMap<Float, TreeSet<Float>>> characterListMapping = new HashMap<String, TreeMap<Float, TreeSet<Float>>>();
+
+    protected PDDocument document;
+    protected Writer output;
+
+    /**
+     * True if we started a paragraph but haven't ended it yet.
+     */
+    private boolean inParagraph;
+
+    /**
+     * Instantiate a new PDFTextStripper object.
+     *
+     * @throws IOException If there is an error loading the properties.
+     */
+    public PDFTextStripper() throws IOException
+    {
+    }
+
+    /**
+     * This will return the text of a document. See writeText. <br>
+     * NOTE: The document must not be encrypted when coming into this method.
+     * 
+     * <p>IMPORTANT: By default, text extraction is done in the same sequence as the text in the PDF page content stream.
+     * PDF is a graphic format, not a text format, and unlike HTML, it has no requirements that text one on page
+     * be rendered in a certain order. The order is the one that was determined by the software that created the
+     * PDF. To get text sorted from left to right and top to botton, use {@link #setSortByPosition(boolean)}.
+     *
+     * @param doc The document to get the text from.
+     * @return The text of the PDF document.
+     * @throws IOException if the doc state is invalid or it is encrypted.
+     */
+    public String getText(PDDocument doc) throws IOException
+    {
+        StringWriter outputStream = new StringWriter();
+        writeText(doc, outputStream);
+        return outputStream.toString();
+    }
+
+    private void resetEngine()
+    {
+        currentPageNo = 0;
+        document = null;
+        if (charactersByArticle != null)
+        {
+            charactersByArticle.clear();
+        }
+        characterListMapping.clear();
+    }
+
+    /**
+     * This will take a PDDocument and write the text of that document to the print writer.
+     *
+     * @param doc The document to get the data from.
+     * @param outputStream The location to put the text.
+     *
+     * @throws IOException If the doc is in an invalid state.
+     */
+    public void writeText(PDDocument doc, Writer outputStream) throws IOException
+    {
+        resetEngine();
+        document = doc;
+        output = outputStream;
+        if (getAddMoreFormatting())
+        {
+            paragraphEnd = lineSeparator;
+            pageStart = lineSeparator;
+            articleStart = lineSeparator;
+            articleEnd = lineSeparator;
+        }
+        startDocument(document);
+        processPages(document.getPages());
+        endDocument(document);
+    }
+
+    /**
+     * This will process all of the pages and the text that is in them.
+     *
+     * @param pages The pages object in the document.
+     *
+     * @throws IOException If there is an error parsing the text.
+     */
+    protected void processPages(PDPageTree pages) throws IOException
+    {
+        PDPage startBookmarkPage = startBookmark == null ? null
+                : startBookmark.findDestinationPage(document);
+        if (startBookmarkPage != null)
+        {
+            startBookmarkPageNumber = pages.indexOf(startBookmarkPage) + 1;
+        }
+        else
+        {
+            // -1 = undefined
+            startBookmarkPageNumber = -1;
+        }
+
+        PDPage endBookmarkPage = endBookmark == null ? null
+                : endBookmark.findDestinationPage(document);
+        if (endBookmarkPage != null)
+        {
+            endBookmarkPageNumber = pages.indexOf(endBookmarkPage) + 1;
+        }
+        else
+        {
+            // -1 = undefined
+            endBookmarkPageNumber = -1;
+        }
+
+        if (startBookmarkPageNumber == -1 && startBookmark != null && endBookmarkPageNumber == -1
+                && endBookmark != null
+                && startBookmark.getCOSObject() == endBookmark.getCOSObject())
+        {
+            // this is a special case where both the start and end bookmark
+            // are the same but point to nothing. In this case
+            // we will not extract any text.
+            startBookmarkPageNumber = 0;
+            endBookmarkPageNumber = 0;
+        }
+
+        for (PDPage page : pages)
+        {
+            currentPageNo++;
+            if (page.hasContents())
+            {
+                processPage(page);
+            }
+        }
+    }
+
+    /**
+     * This method is available for subclasses of this class. It will be called before processing of the document start.
+     *
+     * @param document The PDF document that is being processed.
+     * @throws IOException If an IO error occurs.
+     */
+    protected void startDocument(PDDocument document) throws IOException
+    {
+        // no default implementation, but available for subclasses
+    }
+
+    /**
+     * This method is available for subclasses of this class. It will be called after processing of the document
+     * finishes.
+     *
+     * @param document The PDF document that is being processed.
+     * @throws IOException If an IO error occurs.
+     */
+    protected void endDocument(PDDocument document) throws IOException
+    {
+        // no default implementation, but available for subclasses
+    }
+
+    /**
+     * This will process the contents of a page.
+     *
+     * @param page The page to process.
+     *
+     * @throws IOException If there is an error processing the page.
+     */
+    @Override
+    public void processPage(PDPage page) throws IOException
+    {
+        if (currentPageNo >= startPage && currentPageNo <= endPage
+                && (startBookmarkPageNumber == -1 || currentPageNo >= startBookmarkPageNumber)
+                && (endBookmarkPageNumber == -1 || currentPageNo <= endBookmarkPageNumber))
+        {
+            startPage(page);
+
+            int numberOfArticleSections = 1;
+            if (shouldSeparateByBeads)
+            {
+                fillBeadRectangles(page);
+                numberOfArticleSections += beadRectangles.size() * 2;
+            }
+            int originalSize = charactersByArticle.size();
+            charactersByArticle.ensureCapacity(numberOfArticleSections);
+            int lastIndex = Math.max(numberOfArticleSections, originalSize);
+            for (int i = 0; i < lastIndex; i++)
+            {
+                if (i < originalSize)
+                {
+                    charactersByArticle.get(i).clear();
+                }
+                else
+                {
+                    if (numberOfArticleSections < originalSize)
+                    {
+                        charactersByArticle.remove(i);
+                    }
+                    else
+                    {
+                        charactersByArticle.add(new ArrayList<TextPosition>());
+                    }
+                }
+            }
+            characterListMapping.clear();
+            super.processPage(page);
+            writePage();
+            endPage(page);
+        }
+    }
+
+    private void fillBeadRectangles(PDPage page)
+    {
+        beadRectangles = new ArrayList<PDRectangle>();
+        for (PDThreadBead bead : page.getThreadBeads())
+        {
+            if (bead == null || bead.getRectangle() == null)
+            {
+                // can't skip, because of null entry handling in processTextPosition()
+                beadRectangles.add(null);
+                continue;
+            }
+            
+            PDRectangle rect = bead.getRectangle();
+            
+            // bead rectangle is in PDF coordinates (y=0 is bottom),
+            // glyphs are in image coordinates (y=0 is top),
+            // so we must flip
+            PDRectangle mediaBox = page.getMediaBox();
+            float upperRightY = mediaBox.getUpperRightY() - rect.getLowerLeftY();
+            float lowerLeftY = mediaBox.getUpperRightY() - rect.getUpperRightY();
+            rect.setLowerLeftY(lowerLeftY);
+            rect.setUpperRightY(upperRightY);
+            
+            // adjust for cropbox
+            PDRectangle cropBox = page.getCropBox();
+            if (cropBox.getLowerLeftX() != 0 || cropBox.getLowerLeftY() != 0)
+            {
+                rect.setLowerLeftX(rect.getLowerLeftX() - cropBox.getLowerLeftX());
+                rect.setLowerLeftY(rect.getLowerLeftY() - cropBox.getLowerLeftY());
+                rect.setUpperRightX(rect.getUpperRightX() - cropBox.getLowerLeftX());
+                rect.setUpperRightY(rect.getUpperRightY() - cropBox.getLowerLeftY());
+            }
+            
+            beadRectangles.add(rect);
+        }
+    }
+
+    /**
+     * Start a new article, which is typically defined as a column on a single page (also referred to as a bead). This
+     * assumes that the primary direction of text is left to right. Default implementation is to do nothing. Subclasses
+     * may provide additional information.
+     *
+     * @throws IOException If there is any error writing to the stream.
+     */
+    protected void startArticle() throws IOException
+    {
+        startArticle(true);
+    }
+
+    /**
+     * Start a new article, which is typically defined as a column on a single page (also referred to as a bead).
+     * Default implementation is to do nothing. Subclasses may provide additional information.
+     *
+     * @param isLTR true if primary direction of text is left to right.
+     * @throws IOException If there is any error writing to the stream.
+     */
+    protected void startArticle(boolean isLTR) throws IOException
+    {
+        output.write(getArticleStart());
+    }
+
+    /**
+     * End an article. Default implementation is to do nothing. Subclasses may provide additional information.
+     *
+     * @throws IOException If there is any error writing to the stream.
+     */
+    protected void endArticle() throws IOException
+    {
+        output.write(getArticleEnd());
+    }
+
+    /**
+     * Start a new page. Default implementation is to do nothing. Subclasses may provide additional information.
+     *
+     * @param page The page we are about to process.
+     *
+     * @throws IOException If there is any error writing to the stream.
+     */
+    protected void startPage(PDPage page) throws IOException
+    {
+        // default is to do nothing
+    }
+
+    /**
+     * End a page. Default implementation is to do nothing. Subclasses may provide additional information.
+     *
+     * @param page The page we are about to process.
+     *
+     * @throws IOException If there is any error writing to the stream.
+     */
+    protected void endPage(PDPage page) throws IOException
+    {
+        // default is to do nothing
+    }
+
+    private static final float END_OF_LAST_TEXT_X_RESET_VALUE = -1;
+    private static final float MAX_Y_FOR_LINE_RESET_VALUE = -Float.MAX_VALUE;
+    private static final float EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE = -Float.MAX_VALUE;
+    private static final float MAX_HEIGHT_FOR_LINE_RESET_VALUE = -1;
+    private static final float MIN_Y_TOP_FOR_LINE_RESET_VALUE = Float.MAX_VALUE;
+    private static final float LAST_WORD_SPACING_RESET_VALUE = -1;
+
+    /**
+     * This will print the text of the processed page to "output". It will estimate, based on the coordinates of the
+     * text, where newlines and word spacings should be placed. The text will be sorted only if that feature was
+     * enabled.
+     *
+     * @throws IOException If there is an error writing the text.
+     */
+    protected void writePage() throws IOException
+    {
+        float maxYForLine = MAX_Y_FOR_LINE_RESET_VALUE;
+        float minYTopForLine = MIN_Y_TOP_FOR_LINE_RESET_VALUE;
+        float endOfLastTextX = END_OF_LAST_TEXT_X_RESET_VALUE;
+        float lastWordSpacing = LAST_WORD_SPACING_RESET_VALUE;
+        float maxHeightForLine = MAX_HEIGHT_FOR_LINE_RESET_VALUE;
+        PositionWrapper lastPosition = null;
+        PositionWrapper lastLineStartPosition = null;
+
+        boolean startOfPage = true; // flag to indicate start of page
+        boolean startOfArticle;
+        if (charactersByArticle.size() > 0)
+        {
+            writePageStart();
+        }
+
+        for (List<TextPosition> textList : charactersByArticle)
+        {
+            if (getSortByPosition())
+            {
+                TextPositionComparator comparator = new TextPositionComparator();
+
+                // because the TextPositionComparator is not transitive, but
+                // JDK7+ enforces transitivity on comparators, we need to use
+                // a custom quicksort implementation (which is slower, unfortunately).
+                if (useCustomQuickSort)
+                {
+                    QuickSort.sort(textList, comparator);
+                }
+                else
+                {
+                    Collections.sort(textList, comparator);
+                }
+            }
+
+            startArticle();
+            startOfArticle = true;
+
+            // Now cycle through to print the text.
+            // We queue up a line at a time before we print so that we can convert
+            // the line from presentation form to logical form (if needed).
+            List<LineItem> line = new ArrayList<LineItem>();
+
+            Iterator<TextPosition> textIter = textList.iterator();
+            // PDF files don't always store spaces. We will need to guess where we should add
+            // spaces based on the distances between TextPositions. Historically, this was done
+            // based on the size of the space character provided by the font. In general, this
+            // worked but there were cases where it did not work. Calculating the average character
+            // width and using that as a metric works better in some cases but fails in some cases
+            // where the spacing worked. So we use both. NOTE: Adobe reader also fails on some of
+            // these examples.
+
+            // Keeps track of the previous average character width
+            float previousAveCharWidth = -1;
+            while (textIter.hasNext())
+            {
+                TextPosition position = textIter.next();
+                PositionWrapper current = new PositionWrapper(position);
+                String characterValue = position.getUnicode();
+
+                // Resets the average character width when we see a change in font
+                // or a change in the font size
+                if (lastPosition != null && (position.getFont() != lastPosition.getTextPosition()
+                        .getFont()
+                        || position.getFontSize() != lastPosition.getTextPosition().getFontSize()))
+                {
+                    previousAveCharWidth = -1;
+                }
+
+                float positionX;
+                float positionY;
+                float positionWidth;
+                float positionHeight;
+
+
+                // If we are sorting, then we need to use the text direction
+                // adjusted coordinates, because they were used in the sorting.
+//                if (getSortByPosition())
+//                {
+                    positionX = position.getXDirAdj();
+                    positionY = position.getYDirAdj();
+                    positionWidth = position.getWidthDirAdj();
+                    positionHeight = position.getHeightDir();
+//                }
+//                else
+//                {
+//                    positionX = position.getX();
+//                    positionY = position.getY();
+//                    positionWidth = position.getWidth();
+//                    positionHeight = position.getHeight();
+//                }
+
+                // The current amount of characters in a word
+                int wordCharCount = position.getIndividualWidths().length;
+
+                // Estimate the expected width of the space based on the
+                // space character with some margin.
+                float wordSpacing = position.getWidthOfSpace();
+                float deltaSpace;
+                if (wordSpacing == 0 || Float.isNaN(wordSpacing))
+                {
+                    deltaSpace = Float.MAX_VALUE;
+                }
+                else
+                {
+                    if (lastWordSpacing < 0)
+                    {
+                        deltaSpace = wordSpacing * getSpacingTolerance();
+                    }
+                    else
+                    {
+                        deltaSpace = (wordSpacing + lastWordSpacing) / 2f * getSpacingTolerance();
+                    }
+                }
+
+                // Estimate the expected width of the space based on the average character width
+                // with some margin. This calculation does not make a true average (average of
+                // averages) but we found that it gave the best results after numerous experiments.
+                // Based on experiments we also found that .3 worked well.
+                float averageCharWidth;
+                if (previousAveCharWidth < 0)
+                {
+                    averageCharWidth = positionWidth / wordCharCount;
+                }
+                else
+                {
+                    averageCharWidth = (previousAveCharWidth + positionWidth / wordCharCount) / 2f;
+                }
+                float deltaCharWidth = averageCharWidth * getAverageCharTolerance();
+
+                // Compares the values obtained by the average method and the wordSpacing method
+                // and picks the smaller number.
+                float expectedStartOfNextWordX = EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE;
+                if (endOfLastTextX != END_OF_LAST_TEXT_X_RESET_VALUE)
+                {
+                    expectedStartOfNextWordX = endOfLastTextX + Math.min(deltaSpace, deltaCharWidth);
+                }
+
+                if (lastPosition != null)
+                {
+                    if (startOfArticle)
+                    {
+                        lastPosition.setArticleStart();
+                        startOfArticle = false;
+                    }
+                    // RDD - Here we determine whether this text object is on the current
+                    // line. We use the lastBaselineFontSize to handle the superscript
+                    // case, and the size of the current font to handle the subscript case.
+                    // Text must overlap with the last rendered baseline text by at least
+                    // a small amount in order to be considered as being on the same line.
+
+                    // XXX BC: In theory, this check should really check if the next char is in
+                    // full range seen in this line. This is what I tried to do with minYTopForLine,
+                    // but this caused a lot of regression test failures. So, I'm leaving it be for
+                    // now
+                    if (!overlap(positionY, positionHeight, maxYForLine, maxHeightForLine))
+                    {
+                        writeLine(normalize(line));
+                        line.clear();
+                        lastLineStartPosition = handleLineSeparation(current, lastPosition,
+                                lastLineStartPosition, maxHeightForLine);
+                        expectedStartOfNextWordX = EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE;
+                        maxYForLine = MAX_Y_FOR_LINE_RESET_VALUE;
+                        maxHeightForLine = MAX_HEIGHT_FOR_LINE_RESET_VALUE;
+                        minYTopForLine = MIN_Y_TOP_FOR_LINE_RESET_VALUE;
+                    }
+                    // test if our TextPosition starts after a new word would be expected to start
+                    if (expectedStartOfNextWordX != EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE
+                            && expectedStartOfNextWordX < positionX
+                            // only bother adding a word separator if the last character was not a word separator
+                            && (wordSeparator.isEmpty() || //
+                                    (lastPosition.getTextPosition().getUnicode() != null
+                                            && !lastPosition.getTextPosition().getUnicode()
+                                                    .endsWith(wordSeparator))))
+                    {
+                        line.add(LineItem.getWordSeparator());
+                    }
+                    // if there is at least the equivalent of one space
+                    // between the last character and the current one,
+                    // reset the max line height as the font size may have completely changed
+                    if (Math.abs(position.getX()
+                            - lastPosition.getTextPosition().getX()) > (wordSpacing + deltaSpace))
+                    {
+                        maxYForLine = MAX_Y_FOR_LINE_RESET_VALUE;
+                        maxHeightForLine = MAX_HEIGHT_FOR_LINE_RESET_VALUE;
+                        minYTopForLine = MIN_Y_TOP_FOR_LINE_RESET_VALUE;
+                    }
+                }
+                if (positionY >= maxYForLine)
+                {
+                    maxYForLine = positionY;
+                }
+                // RDD - endX is what PDF considers to be the x coordinate of the
+                // end position of the text. We use it in computing our metrics below.
+                endOfLastTextX = positionX + positionWidth;
+
+                // add it to the list
+                if (characterValue != null)
+                {
+                    if (startOfPage && lastPosition == null)
+                    {
+                        writeParagraphStart();// not sure this is correct for RTL?
+                    }
+                    line.add(new LineItem(position));
+                }
+                maxHeightForLine = Math.max(maxHeightForLine, positionHeight);
+                minYTopForLine = Math.min(minYTopForLine, positionY - positionHeight);
+                lastPosition = current;
+                if (startOfPage)
+                {
+                    lastPosition.setParagraphStart();
+                    lastPosition.setLineStart();
+                    lastLineStartPosition = lastPosition;
+                    startOfPage = false;
+                }
+                lastWordSpacing = wordSpacing;
+                previousAveCharWidth = averageCharWidth;
+            }
+            // print the final line
+            if (line.size() > 0)
+            {
+                writeLine(normalize(line));
+                writeParagraphEnd();
+            }
+            endArticle();
+        }
+        writePageEnd();
+    }
+
+    private boolean overlap(float y1, float height1, float y2, float height2)
+    {
+        return within(y1, y2, .1f) || y2 <= y1 && y2 >= y1 - height1
+                || y1 <= y2 && y1 >= y2 - height2;
+    }
+
+    /**
+     * Write the line separator value to the output stream.
+     * 
+     * @throws IOException If there is a problem writing out the line separator to the document.
+     */
+    protected void writeLineSeparator() throws IOException
+    {
+        output.write(getLineSeparator());
+    }
+
+    /**
+     * Write the word separator value to the output stream.
+     * 
+     * @throws IOException If there is a problem writing out the word separator to the document.
+     */
+    protected void writeWordSeparator() throws IOException
+    {
+        output.write(getWordSeparator());
+    }
+
+    /**
+     * Write the string in TextPosition to the output stream.
+     *
+     * @param text The text to write to the stream.
+     * @throws IOException If there is an error when writing the text.
+     */
+    protected void writeCharacters(TextPosition text) throws IOException
+    {
+        output.write(text.getUnicode());
+    }
+
+    /**
+     * Write a Java string to the output stream. The default implementation will ignore the <code>textPositions</code>
+     * and just calls {@link #writeString(String)}.
+     *
+     * @param text The text to write to the stream.
+     * @param textPositions The TextPositions belonging to the text.
+     * @throws IOException If there is an error when writing the text.
+     */
+    protected void writeString(String text, List<TextPosition> textPositions) throws IOException
+    {
+        writeString(text);
+    }
+
+    /**
+     * Write a Java string to the output stream.
+     *
+     * @param text The text to write to the stream.
+     * @throws IOException If there is an error when writing the text.
+     */
+    protected void writeString(String text) throws IOException
+    {
+        output.write(text);
+    }
+
+    /**
+     * This will determine of two floating point numbers are within a specified variance.
+     *
+     * @param first The first number to compare to.
+     * @param second The second number to compare to.
+     * @param variance The allowed variance.
+     */
+    private boolean within(float first, float second, float variance)
+    {
+        return second < first + variance && second > first - variance;
+    }
+
+    /**
+     * This will process a TextPosition object and add the text to the list of characters on a page. It takes care of
+     * overlapping text.
+     *
+     * @param text The text to process.
+     */
+    @Override
+    protected void processTextPosition(TextPosition text)
+    {
+        boolean showCharacter = true;
+        if (suppressDuplicateOverlappingText)
+        {
+            showCharacter = false;
+            String textCharacter = text.getUnicode();
+            float textX = text.getX();
+            float textY = text.getY();
+            TreeMap<Float, TreeSet<Float>> sameTextCharacters = characterListMapping
+                    .get(textCharacter);
+            if (sameTextCharacters == null)
+            {
+                sameTextCharacters = new TreeMap<Float, TreeSet<Float>>();
+                characterListMapping.put(textCharacter, sameTextCharacters);
+            }
+            // RDD - Here we compute the value that represents the end of the rendered
+            // text. This value is used to determine whether subsequent text rendered
+            // on the same line overwrites the current text.
+            //
+            // We subtract any positive padding to handle cases where extreme amounts
+            // of padding are applied, then backed off (not sure why this is done, but there
+            // are cases where the padding is on the order of 10x the character width, and
+            // the TJ just backs up to compensate after each character). Also, we subtract
+            // an amount to allow for kerning (a percentage of the width of the last
+            // character).
+            boolean suppressCharacter = false;
+            float tolerance = text.getWidth() / textCharacter.length() / 3.0f;
+
+            SortedMap<Float, TreeSet<Float>> xMatches = sameTextCharacters.subMap(textX - tolerance,
+                    textX + tolerance);
+            for (TreeSet<Float> xMatch : xMatches.values())
+            {
+                SortedSet<Float> yMatches = xMatch.subSet(textY - tolerance, textY + tolerance);
+                if (!yMatches.isEmpty())
+                {
+                    suppressCharacter = true;
+                    break;
+                }
+            }
+            if (!suppressCharacter)
+            {
+                TreeSet<Float> ySet = sameTextCharacters.get(textX);
+                if (ySet == null)
+                {
+                    ySet = new TreeSet<Float>();
+                    sameTextCharacters.put(textX, ySet);
+                }
+                ySet.add(textY);
+                showCharacter = true;
+            }
+        }
+        if (showCharacter)
+        {
+            // if we are showing the character then we need to determine which article it belongs to
+            int foundArticleDivisionIndex = -1;
+            int notFoundButFirstLeftAndAboveArticleDivisionIndex = -1;
+            int notFoundButFirstLeftArticleDivisionIndex = -1;
+            int notFoundButFirstAboveArticleDivisionIndex = -1;
+            float x = text.getX();
+            float y = text.getY();
+            if (shouldSeparateByBeads)
+            {
+                for (int i = 0; i < beadRectangles.size() && foundArticleDivisionIndex == -1; i++)
+                {
+                    PDRectangle rect = beadRectangles.get(i);
+                    if (rect != null)
+                    {
+                        if (rect.contains(x, y))
+                        {
+                            foundArticleDivisionIndex = i * 2 + 1;
+                        }
+                        else if ((x < rect.getLowerLeftX() || y < rect.getUpperRightY())
+                                && notFoundButFirstLeftAndAboveArticleDivisionIndex == -1)
+                        {
+                            notFoundButFirstLeftAndAboveArticleDivisionIndex = i * 2;
+                        }
+                        else if (x < rect.getLowerLeftX()
+                                && notFoundButFirstLeftArticleDivisionIndex == -1)
+                        {
+                            notFoundButFirstLeftArticleDivisionIndex = i * 2;
+                        }
+                        else if (y < rect.getUpperRightY()
+                                && notFoundButFirstAboveArticleDivisionIndex == -1)
+                        {
+                            notFoundButFirstAboveArticleDivisionIndex = i * 2;
+                        }
+                    }
+                    else
+                    {
+                        foundArticleDivisionIndex = 0;
+                    }
+                }
+            }
+            else
+            {
+                foundArticleDivisionIndex = 0;
+            }
+            int articleDivisionIndex;
+            if (foundArticleDivisionIndex != -1)
+            {
+                articleDivisionIndex = foundArticleDivisionIndex;
+            }
+            else if (notFoundButFirstLeftAndAboveArticleDivisionIndex != -1)
+            {
+                articleDivisionIndex = notFoundButFirstLeftAndAboveArticleDivisionIndex;
+            }
+            else if (notFoundButFirstLeftArticleDivisionIndex != -1)
+            {
+                articleDivisionIndex = notFoundButFirstLeftArticleDivisionIndex;
+            }
+            else if (notFoundButFirstAboveArticleDivisionIndex != -1)
+            {
+                articleDivisionIndex = notFoundButFirstAboveArticleDivisionIndex;
+            }
+            else
+            {
+                articleDivisionIndex = charactersByArticle.size() - 1;
+            }
+
+            List<TextPosition> textList = charactersByArticle.get(articleDivisionIndex);
+
+            // In the wild, some PDF encoded documents put diacritics (accents on
+            // top of characters) into a separate Tj element. When displaying them
+            // graphically, the two chunks get overlaid. With text output though,
+            // we need to do the overlay. This code recombines the diacritic with
+            // its associated character if the two are consecutive.
+            if (textList.isEmpty())
+            {
+                textList.add(text);
+            }
+            else
+            {
+                // test if we overlap the previous entry.
+                // Note that we are making an assumption that we need to only look back
+                // one TextPosition to find what we are overlapping.
+                // This may not always be true. */
+                TextPosition previousTextPosition = textList.get(textList.size() - 1);
+                if (text.isDiacritic() && previousTextPosition.contains(text))
+                {
+                    previousTextPosition.mergeDiacritic(text);
+                }
+                // If the previous TextPosition was the diacritic, merge it into this
+                // one and remove it from the list.
+                else if (previousTextPosition.isDiacritic() && text.contains(previousTextPosition))
+                {
+                    text.mergeDiacritic(previousTextPosition);
+                    textList.remove(textList.size() - 1);
+                    textList.add(text);
+                }
+                else
+                {
+                    textList.add(text);
+                }
+            }
+        }
+    }
+
+    /**
+     * This is the page that the text extraction will start on. The pages start at page 1. For example in a 5 page PDF
+     * document, if the start page is 1 then all pages will be extracted. If the start page is 4 then pages 4 and 5 will
+     * be extracted. The default value is 1.
+     *
+     * @return Value of property startPage.
+     */
+    public int getStartPage()
+    {
+        return startPage;
+    }
+
+    /**
+     * This will set the first page to be extracted by this class.
+     *
+     * @param startPageValue New value of 1-based startPage property.
+     */
+    public void setStartPage(int startPageValue)
+    {
+        startPage = startPageValue;
+    }
+
+    /**
+     * This will get the last page that will be extracted. This is inclusive, for example if a 5 page PDF an endPage
+     * value of 5 would extract the entire document, an end page of 2 would extract pages 1 and 2. This defaults to
+     * Integer.MAX_VALUE such that all pages of the pdf will be extracted.
+     *
+     * @return Value of property endPage.
+     */
+    public int getEndPage()
+    {
+        return endPage;
+    }
+
+    /**
+     * This will set the last page to be extracted by this class.
+     *
+     * @param endPageValue New value of 1-based endPage property.
+     */
+    public void setEndPage(int endPageValue)
+    {
+        endPage = endPageValue;
+    }
+
+    /**
+     * Set the desired line separator for output text. The line.separator system property is used if the line separator
+     * preference is not set explicitly using this method.
+     *
+     * @param separator The desired line separator string.
+     */
+    public void setLineSeparator(String separator)
+    {
+        lineSeparator = separator;
+    }
+
+    /**
+     * This will get the line separator.
+     *
+     * @return The desired line separator string.
+     */
+    public String getLineSeparator()
+    {
+        return lineSeparator;
+    }
+
+    /**
+     * This will get the word separator.
+     *
+     * @return The desired word separator string.
+     */
+    public String getWordSeparator()
+    {
+        return wordSeparator;
+    }
+
+    /**
+     * Set the desired word separator for output text. The PDFBox text extraction algorithm will output a space
+     * character if there is enough space between two words. By default a space character is used. If you need and
+     * accurate count of characters that are found in a PDF document then you might want to set the word separator to
+     * the empty string.
+     *
+     * @param separator The desired page separator string.
+     */
+    public void setWordSeparator(String separator)
+    {
+        wordSeparator = separator;
+    }
+
+    /**
+     * @return Returns the suppressDuplicateOverlappingText.
+     */
+    public boolean getSuppressDuplicateOverlappingText()
+    {
+        return suppressDuplicateOverlappingText;
+    }
+
+    /**
+     * Get the current page number that is being processed.
+     *
+     * @return A 1 based number representing the current page.
+     */
+    protected int getCurrentPageNo()
+    {
+        return currentPageNo;
+    }
+
+    /**
+     * The output stream that is being written to.
+     *
+     * @return The stream that output is being written to.
+     */
+    protected Writer getOutput()
+    {
+        return output;
+    }
+
+    /**
+     * Character strings are grouped by articles. It is quite common that there will only be a single article. This
+     * returns a List that contains List objects, the inner lists will contain TextPosition objects.
+     *
+     * @return A double List of TextPositions for all text strings on the page.
+     */
+    protected List<List<TextPosition>> getCharactersByArticle()
+    {
+        return charactersByArticle;
+    }
+
+    /**
+     * By default the text stripper will attempt to remove text that overlapps each other. Word paints the same
+     * character several times in order to make it look bold. By setting this to false all text will be extracted, which
+     * means that certain sections will be duplicated, but better performance will be noticed.
+     *
+     * @param suppressDuplicateOverlappingTextValue The suppressDuplicateOverlappingText to set.
+     */
+    public void setSuppressDuplicateOverlappingText(boolean suppressDuplicateOverlappingTextValue)
+    {
+        suppressDuplicateOverlappingText = suppressDuplicateOverlappingTextValue;
+    }
+
+    /**
+     * This will tell if the text stripper should separate by beads.
+     *
+     * @return If the text will be grouped by beads.
+     */
+    public boolean getSeparateByBeads()
+    {
+        return shouldSeparateByBeads;
+    }
+
+    /**
+     * Set if the text stripper should group the text output by a list of beads. The default value is true!
+     *
+     * @param aShouldSeparateByBeads The new grouping of beads.
+     */
+    public void setShouldSeparateByBeads(boolean aShouldSeparateByBeads)
+    {
+        shouldSeparateByBeads = aShouldSeparateByBeads;
+    }
+
+    /**
+     * Get the bookmark where text extraction should end, inclusive. Default is null.
+     *
+     * @return The ending bookmark.
+     */
+    public PDOutlineItem getEndBookmark()
+    {
+        return endBookmark;
+    }
+
+    /**
+     * Set the bookmark where the text extraction should stop.
+     *
+     * @param aEndBookmark The ending bookmark.
+     */
+    public void setEndBookmark(PDOutlineItem aEndBookmark)
+    {
+        endBookmark = aEndBookmark;
+    }
+
+    /**
+     * Get the bookmark where text extraction should start, inclusive. Default is null.
+     *
+     * @return The starting bookmark.
+     */
+    public PDOutlineItem getStartBookmark()
+    {
+        return startBookmark;
+    }
+
+    /**
+     * Set the bookmark where text extraction should start, inclusive.
+     *
+     * @param aStartBookmark The starting bookmark.
+     */
+    public void setStartBookmark(PDOutlineItem aStartBookmark)
+    {
+        startBookmark = aStartBookmark;
+    }
+
+    /**
+     * This will tell if the text stripper should add some more text formatting.
+     * 
+     * @return true if some more text formatting will be added
+     */
+    public boolean getAddMoreFormatting()
+    {
+        return addMoreFormatting;
+    }
+
+    /**
+     * There will some additional text formatting be added if addMoreFormatting is set to true. Default is false.
+     * 
+     * @param newAddMoreFormatting Tell PDFBox to add some more text formatting
+     */
+    public void setAddMoreFormatting(boolean newAddMoreFormatting)
+    {
+        addMoreFormatting = newAddMoreFormatting;
+    }
+
+    /**
+     * This will tell if the text stripper should sort the text tokens before writing to the stream.
+     *
+     * @return true If the text tokens will be sorted before being written.
+     */
+    public boolean getSortByPosition()
+    {
+        return sortByPosition;
+    }
+
+    /**
+     * The order of the text tokens in a PDF file may not be in the same as they appear visually on the screen. For
+     * example, a PDF writer may write out all text by font, so all bold or larger text, then make a second pass and
+     * write out the normal text.<br>
+     * The default is to <b>not</b> sort by position.<br>
+     * <br>
+     * A PDF writer could choose to write each character in a different order. By default PDFBox does <b>not</b> sort
+     * the text tokens before processing them due to performance reasons.
+     *
+     * @param newSortByPosition Tell PDFBox to sort the text positions.
+     */
+    public void setSortByPosition(boolean newSortByPosition)
+    {
+        sortByPosition = newSortByPosition;
+    }
+
+    /**
+     * Get the current space width-based tolerance value that is being used to estimate where spaces in text should be
+     * added. Note that the default value for this has been determined from trial and error.
+     * 
+     * @return The current tolerance / scaling factor
+     */
+    public float getSpacingTolerance()
+    {
+        return spacingTolerance;
+    }
+
+    /**
+     * Set the space width-based tolerance value that is used to estimate where spaces in text should be added. Note
+     * that the default value for this has been determined from trial and error. Setting this value larger will reduce
+     * the number of spaces added.
+     * 
+     * @param spacingToleranceValue tolerance / scaling factor to use
+     */
+    public void setSpacingTolerance(float spacingToleranceValue)
+    {
+        spacingTolerance = spacingToleranceValue;
+    }
+
+    /**
+     * Get the current character width-based tolerance value that is being used to estimate where spaces in text should
+     * be added. Note that the default value for this has been determined from trial and error.
+     * 
+     * @return The current tolerance / scaling factor
+     */
+    public float getAverageCharTolerance()
+    {
+        return averageCharTolerance;
+    }
+
+    /**
+     * Set the character width-based tolerance value that is used to estimate where spaces in text should be added. Note
+     * that the default value for this has been determined from trial and error. Setting this value larger will reduce
+     * the number of spaces added.
+     * 
+     * @param averageCharToleranceValue average tolerance / scaling factor to use
+     */
+    public void setAverageCharTolerance(float averageCharToleranceValue)
+    {
+        averageCharTolerance = averageCharToleranceValue;
+    }
+
+    /**
+     * returns the multiple of whitespace character widths for the current text which the current line start can be
+     * indented from the previous line start beyond which the current line start is considered to be a paragraph start.
+     * 
+     * @return the number of whitespace character widths to use when detecting paragraph indents.
+     */
+    public float getIndentThreshold()
+    {
+        return indentThreshold;
+    }
+
+    /**
+     * sets the multiple of whitespace character widths for the current text which the current line start can be
+     * indented from the previous line start beyond which the current line start is considered to be a paragraph start.
+     * The default value is 2.0.
+     *
+     * @param indentThresholdValue the number of whitespace character widths to use when detecting paragraph indents.
+     */
+    public void setIndentThreshold(float indentThresholdValue)
+    {
+        indentThreshold = indentThresholdValue;
+    }
+
+    /**
+     * the minimum whitespace, as a multiple of the max height of the current characters beyond which the current line
+     * start is considered to be a paragraph start.
+     * 
+     * @return the character height multiple for max allowed whitespace between lines in the same paragraph.
+     */
+    public float getDropThreshold()
+    {
+        return dropThreshold;
+    }
+
+    /**
+     * sets the minimum whitespace, as a multiple of the max height of the current characters beyond which the current
+     * line start is considered to be a paragraph start. The default value is 2.5.
+     *
+     * @param dropThresholdValue the character height multiple for max allowed whitespace between lines in the same
+     * paragraph.
+     */
+    public void setDropThreshold(float dropThresholdValue)
+    {
+        dropThreshold = dropThresholdValue;
+    }
+
+    /**
+     * Returns the string which will be used at the beginning of a paragraph.
+     * 
+     * @return the paragraph start string
+     */
+    public String getParagraphStart()
+    {
+        return paragraphStart;
+    }
+
+    /**
+     * Sets the string which will be used at the beginning of a paragraph.
+     * 
+     * @param s the paragraph start string
+     */
+    public void setParagraphStart(String s)
+    {
+        paragraphStart = s;
+    }
+
+    /**
+     * Returns the string which will be used at the end of a paragraph.
+     * 
+     * @return the paragraph end string
+     */
+    public String getParagraphEnd()
+    {
+        return paragraphEnd;
+    }
+
+    /**
+     * Sets the string which will be used at the end of a paragraph.
+     * 
+     * @param s the paragraph end string
+     */
+    public void setParagraphEnd(String s)
+    {
+        paragraphEnd = s;
+    }
+
+    /**
+     * Returns the string which will be used at the beginning of a page.
+     * 
+     * @return the page start string
+     */
+    public String getPageStart()
+    {
+        return pageStart;
+    }
+
+    /**
+     * Sets the string which will be used at the beginning of a page.
+     * 
+     * @param pageStartValue the page start string
+     */
+    public void setPageStart(String pageStartValue)
+    {
+        pageStart = pageStartValue;
+    }
+
+    /**
+     * Returns the string which will be used at the end of a page.
+     * 
+     * @return the page end string
+     */
+    public String getPageEnd()
+    {
+        return pageEnd;
+    }
+
+    /**
+     * Sets the string which will be used at the end of a page.
+     * 
+     * @param pageEndValue the page end string
+     */
+    public void setPageEnd(String pageEndValue)
+    {
+        pageEnd = pageEndValue;
+    }
+
+    /**
+     * Returns the string which will be used at the beginning of an article.
+     * 
+     * @return the article start string
+     */
+    public String getArticleStart()
+    {
+        return articleStart;
+    }
+
+    /**
+     * Sets the string which will be used at the beginning of an article.
+     * 
+     * @param articleStartValue the article start string
+     */
+    public void setArticleStart(String articleStartValue)
+    {
+        articleStart = articleStartValue;
+    }
+
+    /**
+     * Returns the string which will be used at the end of an article.
+     * 
+     * @return the article end string
+     */
+    public String getArticleEnd()
+    {
+        return articleEnd;
+    }
+
+    /**
+     * Sets the string which will be used at the end of an article.
+     * 
+     * @param articleEndValue the article end string
+     */
+    public void setArticleEnd(String articleEndValue)
+    {
+        articleEnd = articleEndValue;
+    }
+
+    /**
+     * handles the line separator for a new line given the specified current and previous TextPositions.
+     * 
+     * @param current the current text position
+     * @param lastPosition the previous text position
+     * @param lastLineStartPosition the last text position that followed a line separator.
+     * @param maxHeightForLine max height for positions since lastLineStartPosition
+     * @return start position of the last line
+     * @throws IOException if something went wrong
+     */
+    private PositionWrapper handleLineSeparation(PositionWrapper current,
+            PositionWrapper lastPosition, PositionWrapper lastLineStartPosition,
+            float maxHeightForLine) throws IOException
+    {
+        current.setLineStart();
+        isParagraphSeparation(current, lastPosition, lastLineStartPosition, maxHeightForLine);
+        lastLineStartPosition = current;
+        if (current.isParagraphStart())
+        {
+            if (lastPosition.isArticleStart())
+            {
+                if (lastPosition.isLineStart())
+                {
+                    writeLineSeparator();
+                }
+                writeParagraphStart();
+            }
+            else
+            {
+                writeLineSeparator();
+                writeParagraphSeparator();
+            }
+        }
+        else
+        {
+            writeLineSeparator();
+        }
+        return lastLineStartPosition;
+    }
+
+    /**
+     * tests the relationship between the last text position, the current text position and the last text position that
+     * followed a line separator to decide if the gap represents a paragraph separation. This should <i>only</i> be
+     * called for consecutive text positions that first pass the line separation test.
+     * <p>
+     * This base implementation tests to see if the lastLineStartPosition is null OR if the current vertical position
+     * has dropped below the last text vertical position by at least 2.5 times the current text height OR if the current
+     * horizontal position is indented by at least 2 times the current width of a space character.
+     * </p>
+     * <p>
+     * This also attempts to identify text that is indented under a hanging indent.
+     * </p>
+     * <p>
+     * This method sets the isParagraphStart and isHangingIndent flags on the current position object.
+     * </p>
+     *
+     * @param position the current text position. This may have its isParagraphStart or isHangingIndent flags set upon
+     * return.
+     * @param lastPosition the previous text position (should not be null).
+     * @param lastLineStartPosition the last text position that followed a line separator, or null.
+     * @param maxHeightForLine max height for text positions since lasLineStartPosition.
+     */
+    private void isParagraphSeparation(PositionWrapper position, PositionWrapper lastPosition,
+            PositionWrapper lastLineStartPosition, float maxHeightForLine)
+    {
+        boolean result = false;
+        if (lastLineStartPosition == null)
+        {
+            result = true;
+        }
+        else
+        {
+            float yGap = Math.abs(position.getTextPosition().getYDirAdj()
+                    - lastPosition.getTextPosition().getYDirAdj());
+            float newYVal = multiplyFloat(getDropThreshold(), maxHeightForLine);
+            // do we need to flip this for rtl?
+            float xGap = position.getTextPosition().getXDirAdj()
+                    - lastLineStartPosition.getTextPosition().getXDirAdj();
+            float newXVal = multiplyFloat(getIndentThreshold(),
+                    position.getTextPosition().getWidthOfSpace());
+            float positionWidth = multiplyFloat(0.25f, position.getTextPosition().getWidth());
+
+            if (yGap > newYVal)
+            {
+                result = true;
+            }
+            else if (xGap > newXVal)
+            {
+                // text is indented, but try to screen for hanging indent
+                if (!lastLineStartPosition.isParagraphStart())
+                {
+                    result = true;
+                }
+                else
+                {
+                    position.setHangingIndent();
+                }
+            }
+            else if (xGap < -position.getTextPosition().getWidthOfSpace())
+            {
+                // text is left of previous line. Was it a hanging indent?
+                if (!lastLineStartPosition.isParagraphStart())
+                {
+                    result = true;
+                }
+            }
+            else if (Math.abs(xGap) < positionWidth)
+            {
+                // current horizontal position is within 1/4 a char of the last
+                // linestart. We'll treat them as lined up.
+                if (lastLineStartPosition.isHangingIndent())
+                {
+                    position.setHangingIndent();
+                }
+                else if (lastLineStartPosition.isParagraphStart())
+                {
+                    // check to see if the previous line looks like
+                    // any of a number of standard list item formats
+                    Pattern liPattern = matchListItemPattern(lastLineStartPosition);
+                    if (liPattern != null)
+                    {
+                        Pattern currentPattern = matchListItemPattern(position);
+                        if (liPattern == currentPattern)
+                        {
+                            result = true;
+                        }
+                    }
+                }
+            }
+        }
+        if (result)
+        {
+            position.setParagraphStart();
+        }
+    }
+
+    private float multiplyFloat(float value1, float value2)
+    {
+        // multiply 2 floats and truncate the resulting value to 3 decimal places
+        // to avoid wrong results when comparing with another float
+        return Math.round(value1 * value2 * 1000) / 1000f;
+    }
+
+    /**
+     * writes the paragraph separator string to the output.
+     * 
+     * @throws IOException if something went wrong
+     */
+    protected void writeParagraphSeparator() throws IOException
+    {
+        writeParagraphEnd();
+        writeParagraphStart();
+    }
+
+    /**
+     * Write something (if defined) at the start of a paragraph.
+     * 
+     * @throws IOException if something went wrong
+     */
+    protected void writeParagraphStart() throws IOException
+    {
+        if (inParagraph)
+        {
+            writeParagraphEnd();
+            inParagraph = false;
+        }
+        output.write(getParagraphStart());
+        inParagraph = true;
+    }
+
+    /**
+     * Write something (if defined) at the end of a paragraph.
+     * 
+     * @throws IOException if something went wrong
+     */
+    protected void writeParagraphEnd() throws IOException
+    {
+        if (!inParagraph)
+        {
+            writeParagraphStart();
+        }
+        output.write(getParagraphEnd());
+        inParagraph = false;
+    }
+
+    /**
+     * Write something (if defined) at the start of a page.
+     * 
+     * @throws IOException if something went wrong
+     */
+    protected void writePageStart() throws IOException
+    {
+        output.write(getPageStart());
+    }
+
+    /**
+     * Write something (if defined) at the end of a page.
+     * 
+     * @throws IOException if something went wrong
+     */
+    protected void writePageEnd() throws IOException
+    {
+        output.write(getPageEnd());
+    }
+
+    /**
+     * returns the list item Pattern object that matches the text at the specified PositionWrapper or null if the text
+     * does not match such a pattern. The list of Patterns tested against is given by the {@link #getListItemPatterns()}
+     * method. To add to the list, simply override that method (if sub-classing) or explicitly supply your own list
+     * using {@link #setListItemPatterns(List)}.
+     * 
+     * @param pw position
+     * @return the matching pattern
+     */
+    private Pattern matchListItemPattern(PositionWrapper pw)
+    {
+        TextPosition tp = pw.getTextPosition();
+        String txt = tp.getUnicode();
+        return matchPattern(txt, getListItemPatterns());
+    }
+
+    /**
+     * a list of regular expressions that match commonly used list item formats, i.e. bullets, numbers, letters, Roman
+     * numerals, etc. Not meant to be comprehensive.
+     */
+    private static final String[] LIST_ITEM_EXPRESSIONS = { "\\.", "\\d+\\.", "\\[\\d+\\]",
+            "\\d+\\)", "[A-Z]\\.", "[a-z]\\.", "[A-Z]\\)", "[a-z]\\)", "[IVXL]+\\.",
+            "[ivxl]+\\.", };
+
+    private List<Pattern> listOfPatterns = null;
+
+    /**
+     * use to supply a different set of regular expression patterns for matching list item starts.
+     *
+     * @param patterns list of patterns
+     */
+    protected void setListItemPatterns(List<Pattern> patterns)
+    {
+        listOfPatterns = patterns;
+    }
+
+    /**
+     * returns a list of regular expression Patterns representing different common list item formats. For example
+     * numbered items of form:
+     * <ol>
+     * <li>some text</li>
+     * <li>more text</li>
+     * </ol>
+     * or
+     * <ul>
+     * <li>some text</li>
+     * <li>more text</li>
+     * </ul>
+     * etc., all begin with some character pattern. The pattern "\\d+\." (matches "1.", "2.", ...) or "\[\\d+\]"
+     * (matches "[1]", "[2]", ...).
+     * <p>
+     * This method returns a list of such regular expression Patterns.
+     * 
+     * @return a list of Pattern objects.
+     */
+    protected List<Pattern> getListItemPatterns()
+    {
+        if (listOfPatterns == null)
+        {
+            listOfPatterns = new ArrayList<Pattern>();
+            for (String expression : LIST_ITEM_EXPRESSIONS)
+            {
+                Pattern p = Pattern.compile(expression);
+                listOfPatterns.add(p);
+            }
+        }
+        return listOfPatterns;
+    }
+
+    /**
+     * iterates over the specified list of Patterns until it finds one that matches the specified string. Then returns
+     * the Pattern.
+     * <p>
+     * Order of the supplied list of patterns is important as most common patterns should come first. Patterns should be
+     * strict in general, and all will be used with case sensitivity on.
+     * </p>
+     * 
+     * @param string the string to be searched
+     * @param patterns list of patterns
+     * @return matching pattern
+     */
+    protected static Pattern matchPattern(String string, List<Pattern> patterns)
+    {
+        for (Pattern p : patterns)
+        {
+            if (p.matcher(string).matches())
+            {
+                return p;
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Write a list of string containing a whole line of a document.
+     * 
+     * @param line a list with the words of the given line
+     * @throws IOException if something went wrong
+     */
+    private void writeLine(List<WordWithTextPositions> line)
+            throws IOException
+    {
+        int numberOfStrings = line.size();
+        for (int i = 0; i < numberOfStrings; i++)
+        {
+            WordWithTextPositions word = line.get(i);
+            writeString(word.getText(), word.getTextPositions());
+            if (i < numberOfStrings - 1)
+            {
+                writeWordSeparator();
+            }
+        }
+    }
+
+    /**
+     * Normalize the given list of TextPositions.
+     * 
+     * @param line list of TextPositions
+     * @return a list of strings, one string for every word
+     */
+    private List<WordWithTextPositions> normalize(List<LineItem> line)
+    {
+        List<WordWithTextPositions> normalized = new LinkedList<WordWithTextPositions>();
+        StringBuilder lineBuilder = new StringBuilder();
+        List<TextPosition> wordPositions = new ArrayList<TextPosition>();
+
+        for (LineItem item : line)
+        {
+            lineBuilder = normalizeAdd(normalized, lineBuilder, wordPositions, item);
+        }
+
+        if (lineBuilder.length() > 0)
+        {
+            normalized.add(createWord(lineBuilder.toString(), wordPositions));
+        }
+        return normalized;
+    }
+
+    /**
+     * Handles the LTR and RTL direction of the given words. The whole implementation stands and falls with the given
+     * word. If the word is a full line, the results will be the best. If the word contains of single words or
+     * characters, the order of the characters in a word or words in a line may wrong, due to RTL and LTR marks and
+     * characters!
+     * 
+     * Based on http://www.nesterovsky-bros.com/weblog/2013/07/28/VisualToLogicalConversionInJava.aspx
+     * 
+     * @param word The word that shall be processed
+     * @return new word with the correct direction of the containing characters
+     */
+    private String handleDirection(String word)
+    {
+        Bidi bidi = new Bidi(word, Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT);
+
+        // if there is pure LTR text no need to process further
+        if (!bidi.isMixed() && bidi.getBaseLevel() == Bidi.DIRECTION_LEFT_TO_RIGHT)
+        {
+            return word;
+        }
+        
+        // collect individual bidi information
+        int runCount = bidi.getRunCount();
+        byte[] levels = new byte[runCount];
+        Integer[] runs = new Integer[runCount];
+      
+        for (int i = 0; i < runCount; i++)
+        {
+           levels[i] = (byte)bidi.getRunLevel(i);
+           runs[i] = i;
+        }
+
+        // reorder individual parts based on their levels
+        Bidi.reorderVisually(levels, 0, runs, 0, runCount);
+        
+        // collect the parts based on the direction within the run
+        StringBuilder result = new StringBuilder();
+
+        for (int i = 0; i < runCount; i++)
+        {
+           int index = runs[i];
+           int start = bidi.getRunStart(index);
+           int end = bidi.getRunLimit(index);
+
+            int level = levels[index];
+
+            if ((level & 1) != 0)
+            {
+                while (--end >= start)
+                {
+                    char character = word.charAt(end);
+                    if (Character.isMirrored(word.codePointAt(end)))
+                    {
+                        if (MIRRORING_CHAR_MAP.containsKey(character))
+                        {
+                            result.append(MIRRORING_CHAR_MAP.get(character));
+                        }
+                        else
+                        {
+                            result.append(character);
+                        }
+                    }
+                    else
+                    {
+                        result.append(character);
+                    }
+                }
+            }
+            else
+            {
+                result.append(word, start, end);
+            }
+        }
+        
+        return result.toString();
+    }
+
+    private static Map<Character, Character> MIRRORING_CHAR_MAP = new HashMap<Character, Character>();
+
+    static
+    {
+        String path = "/org/apache/pdfbox/resources/text/BidiMirroring.txt";
+        InputStream input = new BufferedInputStream(PDFTextStripper.class.getResourceAsStream(path));
+        try
+        {
+            parseBidiFile(input);
+        }
+        catch (IOException e)
+        {
+            LOG.warn("Could not parse BidiMirroring.txt, mirroring char map will be empty: "
+                    + e.getMessage());
+        }
+        finally
+        {
+            try
+            {
+                input.close();
+            }
+            catch (IOException e)
+            {
+                LOG.error("Could not close BidiMirroring.txt ", e);
+            }
+        }
+    }
+
+    /**
+     * This method parses the bidi file provided as inputstream.
+     * 
+     * @param inputStream - The bidi file as inputstream
+     * @throws IOException if any line could not be read by the LineNumberReader
+     */
+    private static void parseBidiFile(InputStream inputStream) throws IOException
+    {
+        LineNumberReader rd = new LineNumberReader(new InputStreamReader(inputStream));
+
+        do
+        {
+            String s = rd.readLine();
+            if (s == null)
+            {
+                break;
+            }
+
+            int comment = s.indexOf('#'); // ignore comments
+            if (comment != -1)
+            {
+                s = s.substring(0, comment);
+            }
+
+            if (s.length() < 2)
+            {
+                continue;
+            }
+
+            StringTokenizer st = new StringTokenizer(s, ";");
+            int nFields = st.countTokens();
+            Character[] fields = new Character[nFields];
+            for (int i = 0; i < nFields; i++)
+            {
+                fields[i] = (char) Integer.parseInt(st.nextToken().trim(), 16);
+            }
+
+            if (fields.length == 2)
+            {
+                // initialize the MIRRORING_CHAR_MAP
+                MIRRORING_CHAR_MAP.put(fields[0], fields[1]);
+            }
+
+        } while (true);
+    }
+
+    /**
+     * Used within {@link #normalize(List)} to create a single {@link WordWithTextPositions} entry.
+     */
+    private WordWithTextPositions createWord(String word, List<TextPosition> wordPositions)
+    {
+        return new WordWithTextPositions(normalizeWord(word), wordPositions);
+    }
+
+    /**
+     * Normalize certain Unicode characters. For example, convert the single "fi" ligature to "f" and "i". Also
+     * normalises Arabic and Hebrew presentation forms.
+     *
+     * @param word Word to normalize
+     * @return Normalized word
+     */
+    private String normalizeWord(String word)
+    {
+        StringBuilder builder = null;
+        int p = 0;
+        int q = 0;
+        int strLength = word.length();
+        for (; q < strLength; q++)
+        {
+            // We only normalize if the codepoint is in a given range.
+            // Otherwise, NFKC converts too many things that would cause
+            // confusion. For example, it converts the micro symbol in
+            // extended Latin to the value in the Greek script. We normalize
+            // the Unicode Alphabetic and Arabic A&B Presentation forms.
+            char c = word.charAt(q);
+            if (0xFB00 <= c && c <= 0xFDFF || 0xFE70 <= c && c <= 0xFEFF)
+            {
+                if (builder == null)
+                {
+                    builder = new StringBuilder(strLength * 2);
+                }
+                builder.append(word, p, q);
+                // Some fonts map U+FDF2 differently than the Unicode spec.
+                // They add an extra U+0627 character to compensate.
+                // This removes the extra character for those fonts.
+                if (c == 0xFDF2 && q > 0
+                        && (word.charAt(q - 1) == 0x0627 || word.charAt(q - 1) == 0xFE8D))
+                {
+                    builder.append("\u0644\u0644\u0647");
+                }
+                else
+                {
+                    // Trim because some decompositions have an extra space, such as U+FC5E
+                    builder.append(Normalizer
+                            .normalize(word.substring(q, q + 1), Normalizer.Form.NFKC).trim());
+                }
+                p = q + 1;
+            }
+        }
+        if (builder == null)
+        {
+            return handleDirection(word);
+        }
+        else
+        {
+            builder.append(word, p, q);
+            return handleDirection(builder.toString());
+        }
+    }
+
+    /**
+     * Used within {@link #normalize(List)} to handle a {@link TextPosition}.
+     * 
+     * @return The StringBuilder that must be used when calling this method.
+     */
+    private StringBuilder normalizeAdd(List<WordWithTextPositions> normalized,
+            StringBuilder lineBuilder, List<TextPosition> wordPositions, LineItem item)
+    {
+        if (item.isWordSeparator())
+        {
+            normalized.add(
+                    createWord(lineBuilder.toString(), new ArrayList<TextPosition>(wordPositions)));
+            lineBuilder = new StringBuilder();
+            wordPositions.clear();
+        }
+        else
+        {
+            TextPosition text = item.getTextPosition();
+            lineBuilder.append(text.getUnicode());
+            wordPositions.add(text);
+        }
+        return lineBuilder;
+    }
+
+    /**
+     * internal marker class. Used as a place holder in a line of TextPositions.
+     */
+    private static final class LineItem
+    {
+        public static LineItem WORD_SEPARATOR = new LineItem();
+
+        public static LineItem getWordSeparator()
+        {
+            return WORD_SEPARATOR;
+        }
+
+        private final TextPosition textPosition;
+
+        private LineItem()
+        {
+            textPosition = null;
+        }
+
+        LineItem(TextPosition textPosition)
+        {
+            this.textPosition = textPosition;
+        }
+
+        public TextPosition getTextPosition()
+        {
+            return textPosition;
+        }
+
+        public boolean isWordSeparator()
+        {
+            return textPosition == null;
+        }
+    }
+
+    /**
+     * Internal class that maps strings to lists of {@link TextPosition} arrays. Note that the number of entries in that
+     * list may differ from the number of characters in the string due to normalization.
+     *
+     * @author Axel Dörfler
+     */
+    private static final class WordWithTextPositions
+    {
+        String text;
+        List<TextPosition> textPositions;
+
+        WordWithTextPositions(String word, List<TextPosition> positions)
+        {
+            text = word;
+            textPositions = positions;
+        }
+
+        public String getText()
+        {
+            return text;
+        }
+
+        public List<TextPosition> getTextPositions()
+        {
+            return textPositions;
+        }
+    }
+
+    /**
+     * wrapper of TextPosition that adds flags to track status as linestart and paragraph start positions.
+     * <p>
+     * This is implemented as a wrapper since the TextPosition class doesn't provide complete access to its state fields
+     * to subclasses. Also, conceptually TextPosition is immutable while these flags need to be set post-creation so it
+     * makes sense to put these flags in this separate class.
+     * </p>
+     * 
+     * @author m.martinez@ll.mit.edu
+     */
+    private static final class PositionWrapper
+    {
+        private boolean isLineStart = false;
+        private boolean isParagraphStart = false;
+        private boolean isPageBreak = false;
+        private boolean isHangingIndent = false;
+        private boolean isArticleStart = false;
+
+        private TextPosition position = null;
+
+        /**
+         * Constructs a PositionWrapper around the specified TextPosition object.
+         *
+         * @param position the text position.
+         */
+        PositionWrapper(TextPosition position)
+        {
+            this.position = position;
+        }
+
+        /**
+         * Returns the underlying TextPosition object.
+         * 
+         * @return the text position
+         */
+        public TextPosition getTextPosition()
+        {
+            return position;
+        }
+
+        public boolean isLineStart()
+        {
+            return isLineStart;
+        }
+
+        /**
+         * Sets the isLineStart() flag to true.
+         */
+        public void setLineStart()
+        {
+            this.isLineStart = true;
+        }
+
+        public boolean isParagraphStart()
+        {
+            return isParagraphStart;
+        }
+
+        /**
+         * sets the isParagraphStart() flag to true.
+         */
+        public void setParagraphStart()
+        {
+            this.isParagraphStart = true;
+        }
+
+        public boolean isArticleStart()
+        {
+            return isArticleStart;
+        }
+
+        /**
+         * Sets the isArticleStart() flag to true.
+         */
+        public void setArticleStart()
+        {
+            this.isArticleStart = true;
+        }
+
+        public boolean isPageBreak()
+        {
+            return isPageBreak;
+        }
+
+        /**
+         * Sets the isPageBreak() flag to true.
+         */
+        public void setPageBreak()
+        {
+            this.isPageBreak = true;
+        }
+
+        public boolean isHangingIndent()
+        {
+            return isHangingIndent;
+        }
+
+        /**
+         * Sets the isHangingIndent() flag to true.
+         */
+        public void setHangingIndent()
+        {
+            this.isHangingIndent = true;
+        }
+    }
+}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java
index 41595cc4..1cb6e780 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java
@@ -899,7 +899,7 @@ public class RedactionIntegrationTest {
     @Test
     public void redactionTest() throws IOException {
 
-        String fileName = "files/new/VV-511309.pdf";
+        String fileName = "files/new/S416.pdf";
         String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
 
         long start = System.currentTimeMillis();
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/S416.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/S416.pdf
new file mode 100644
index 00000000..fb043424
Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/S416.pdf differ