Pull request #259: RED-2536
Merge in RED/redaction-service from RED-2536 to master * commit 'd892d6e81ee12927e57dc46c6439a65276824896': RED-2536: Treat \t same as whitespace RED-2223: Fixed stange end of textposition sequences that leads to wrong whitespaces
This commit is contained in:
commit
65b186be28
@ -1,5 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.service;
|
||||
|
||||
import static java.util.stream.Collectors.toSet;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.FloatFrequencyCounter;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Orientation;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
@ -16,6 +18,7 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
@ -25,9 +28,12 @@ public class BlockificationService {
|
||||
|
||||
static final float THRESHOLD = 1f;
|
||||
|
||||
|
||||
public Page blockify(List<TextPositionSequence> textPositions, List<Ruling> horizontalRulingLines,
|
||||
List<Ruling> verticalRulingLines) {
|
||||
|
||||
sortRotatedSequences(textPositions);
|
||||
|
||||
List<TextPositionSequence> chunkWords = new ArrayList<>();
|
||||
List<AbstractTextContainer> chunkBlockList1 = new ArrayList<>();
|
||||
|
||||
@ -50,7 +56,7 @@ public class BlockificationService {
|
||||
if (prev != null && (lineSeparation || startFromTop || splitByX || newLineAfterSplit || splittedByRuling)) {
|
||||
|
||||
Orientation prevOrientation = null;
|
||||
if(!chunkBlockList1.isEmpty()) {
|
||||
if (!chunkBlockList1.isEmpty()) {
|
||||
prevOrientation = chunkBlockList1.get(chunkBlockList1.size() - 1).getOrientation();
|
||||
}
|
||||
|
||||
@ -62,15 +68,11 @@ public class BlockificationService {
|
||||
wasSplitted = true;
|
||||
cb1.setOrientation(Orientation.LEFT);
|
||||
splitX1 = word.getX1();
|
||||
} else
|
||||
|
||||
if (newLineAfterSplit && !splittedByRuling) {
|
||||
} else if (newLineAfterSplit && !splittedByRuling) {
|
||||
wasSplitted = false;
|
||||
cb1.setOrientation(Orientation.RIGHT);
|
||||
splitX1 = null;
|
||||
} else
|
||||
|
||||
if(prevOrientation != null && prevOrientation.equals(Orientation.RIGHT) && (lineSeparation || !startFromTop || !splitByX || !newLineAfterSplit || !splittedByRuling)){
|
||||
} else if (prevOrientation != null && prevOrientation.equals(Orientation.RIGHT) && (lineSeparation || !startFromTop || !splitByX || !newLineAfterSplit || !splittedByRuling)) {
|
||||
cb1.setOrientation(Orientation.LEFT);
|
||||
}
|
||||
|
||||
@ -110,16 +112,18 @@ public class BlockificationService {
|
||||
while (itty.hasNext()) {
|
||||
TextBlock block = (TextBlock) itty.next();
|
||||
|
||||
if(previousLeft != null && block.getOrientation().equals(Orientation.LEFT)){
|
||||
if (previousLeft.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousLeft.getMinY()){
|
||||
if (previousLeft != null && block.getOrientation().equals(Orientation.LEFT)) {
|
||||
if (previousLeft.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousLeft
|
||||
.getMinY()) {
|
||||
previousLeft.add(block);
|
||||
itty.remove();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if(previousRight != null && block.getOrientation().equals(Orientation.RIGHT)){
|
||||
if (previousRight.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousRight.getMinY()){
|
||||
if (previousRight != null && block.getOrientation().equals(Orientation.RIGHT)) {
|
||||
if (previousRight.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousRight
|
||||
.getMinY()) {
|
||||
previousRight.add(block);
|
||||
itty.remove();
|
||||
continue;
|
||||
@ -133,16 +137,16 @@ public class BlockificationService {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
itty = chunkBlockList1.iterator();
|
||||
TextBlock previous = null;
|
||||
while (itty.hasNext()) {
|
||||
TextBlock block = (TextBlock) itty.next();
|
||||
|
||||
if(previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation().equals(Orientation.LEFT) && equalsWithThreshold(block.getMaxY(), previous
|
||||
.getMaxY())||
|
||||
previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation().equals(Orientation.RIGHT) && equalsWithThreshold(block.getMaxY(), previous
|
||||
.getMaxY())){
|
||||
if (previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation()
|
||||
.equals(Orientation.LEFT) && equalsWithThreshold(block.getMaxY(), previous.getMaxY()) || previous != null && previous
|
||||
.getOrientation()
|
||||
.equals(Orientation.LEFT) && block.getOrientation()
|
||||
.equals(Orientation.RIGHT) && equalsWithThreshold(block.getMaxY(), previous.getMaxY())) {
|
||||
previous.add(block);
|
||||
itty.remove();
|
||||
continue;
|
||||
@ -151,11 +155,12 @@ public class BlockificationService {
|
||||
previous = block;
|
||||
}
|
||||
|
||||
|
||||
return new Page(chunkBlockList1);
|
||||
}
|
||||
|
||||
private boolean equalsWithThreshold(float f1, float f2){
|
||||
|
||||
private boolean equalsWithThreshold(float f1, float f2) {
|
||||
|
||||
return Math.abs(f1 - f2) < THRESHOLD;
|
||||
}
|
||||
|
||||
@ -197,6 +202,13 @@ public class BlockificationService {
|
||||
textBlock.setHighestFontSize(fontSizeFrequencyCounter.getHighest());
|
||||
}
|
||||
|
||||
if (textBlock != null && textBlock.getSequences() != null && textBlock.getSequences()
|
||||
.stream()
|
||||
.map(t -> round(t.getY1(), 3))
|
||||
.collect(toSet())
|
||||
.size() == 1) {
|
||||
textBlock.getSequences().sort(Comparator.comparing(TextPositionSequence::getX1));
|
||||
}
|
||||
return textBlock;
|
||||
}
|
||||
|
||||
@ -291,4 +303,30 @@ public class BlockificationService {
|
||||
return new Rectangle(minY, minX, maxX - minX, maxY - minY);
|
||||
}
|
||||
|
||||
|
||||
private void sortRotatedSequences(List<TextPositionSequence> sequences) {
|
||||
|
||||
List<TextPositionSequence> rotatedWords = new ArrayList<>();
|
||||
Iterator<TextPositionSequence> itty = sequences.iterator();
|
||||
while (itty.hasNext()) {
|
||||
var pos = itty.next();
|
||||
if (pos.getTextPositions().get(0).getDir() == 270) {
|
||||
rotatedWords.add(pos);
|
||||
itty.remove();
|
||||
}
|
||||
}
|
||||
|
||||
if (!rotatedWords.isEmpty() && !sequences.isEmpty()) {
|
||||
rotatedWords.sort(Comparator.comparing(TextPositionSequence::getX1));
|
||||
}
|
||||
sequences.addAll(rotatedWords);
|
||||
}
|
||||
|
||||
|
||||
private double round(float value, int decimalPoints) {
|
||||
|
||||
var d = Math.pow(10, decimalPoints);
|
||||
return Math.round(value * d) / d;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,16 +1,33 @@
|
||||
package com.iqser.red.service.redaction.v1.server.parsing;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import java.awt.geom.Point2D;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.reflect.FieldUtils;
|
||||
import org.apache.pdfbox.contentstream.operator.Operator;
|
||||
import org.apache.pdfbox.contentstream.operator.OperatorName;
|
||||
import org.apache.pdfbox.contentstream.operator.color.*;
|
||||
import org.apache.pdfbox.contentstream.operator.state.*;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorN;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorSpace;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceCMYKColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceGrayColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceRGBColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorN;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorSpace;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceCMYKColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceGrayColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceRGBColor;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetFlatness;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetLineCapStyle;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetLineDashPattern;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetLineJoinStyle;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetLineMiterLimit;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetLineWidth;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetRenderingIntent;
|
||||
import org.apache.pdfbox.contentstream.operator.text.SetFontAndSize;
|
||||
import org.apache.pdfbox.cos.COSBase;
|
||||
import org.apache.pdfbox.cos.COSName;
|
||||
@ -23,12 +40,14 @@ import org.apache.pdfbox.text.PDFTextStripper;
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
import org.apache.pdfbox.util.Matrix;
|
||||
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.awt.geom.Point2D;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.RedTextPosition;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
@ -189,16 +208,19 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
COSName objectName = (COSName) arguments.get(0);
|
||||
PDXObject xobject = getResources().getXObject(objectName);
|
||||
if (xobject instanceof PDImageXObject) {
|
||||
PDImageXObject image = (PDImageXObject)xobject;
|
||||
PDImageXObject image = (PDImageXObject) xobject;
|
||||
Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix();
|
||||
|
||||
Rectangle2D rect = new Rectangle2D.Float(ctmNew.getTranslateX(), ctmNew.getTranslateY(), ctmNew.getScaleX(), ctmNew.getScaleY());
|
||||
Rectangle2D rect = new Rectangle2D.Float(ctmNew.getTranslateX(), ctmNew.getTranslateY(), ctmNew.getScaleX(), ctmNew
|
||||
.getScaleY());
|
||||
|
||||
// Memory Hack - sofReference kills me
|
||||
FieldUtils.writeField(image, "cachedImageSubsampling", -1, true);
|
||||
|
||||
if (rect.getHeight() > 2 && rect.getWidth() > 2) {
|
||||
this.images.add(new PdfImage(image.getImage(), rect, pageNumber, image.getImage().getColorModel().hasAlpha()));
|
||||
this.images.add(new PdfImage(image.getImage(), rect, pageNumber, image.getImage()
|
||||
.getColorModel()
|
||||
.hasAlpha()));
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
@ -207,8 +229,6 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
private float floatValue(COSBase value) {
|
||||
|
||||
if (value instanceof COSNumber) {
|
||||
@ -247,8 +267,16 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
public void writeString(String text, List<TextPosition> textPositions) throws IOException {
|
||||
|
||||
int startIndex = 0;
|
||||
RedTextPosition previous = null;
|
||||
|
||||
for (int i = 0; i <= textPositions.size() - 1; i++) {
|
||||
|
||||
if (!textPositionSequences.isEmpty()) {
|
||||
previous = textPositionSequences.get(textPositionSequences.size() - 1)
|
||||
.getTextPositions()
|
||||
.get(textPositionSequences.get(textPositionSequences.size() - 1).getTextPositions().size() - 1);
|
||||
}
|
||||
|
||||
int charWidth = (int) textPositions.get(i).getWidthDirAdj();
|
||||
if (charWidth < minCharWidth) {
|
||||
minCharWidth = charWidth;
|
||||
@ -267,42 +295,54 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
|
||||
if (i == 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
|
||||
.getUnicode()
|
||||
.equals("\u00A0"))) {
|
||||
.equals("\u00A0") || textPositions.get(i)
|
||||
.getUnicode()
|
||||
.equals("\t"))) {
|
||||
startIndex++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Strange but sometimes this is happening, for example: Metolachlor2.pdf
|
||||
if (i > 0 && textPositions.get(i).getX() < textPositions.get(i - 1).getX()) {
|
||||
if (i > 0 && textPositions.get(i).getXDirAdj() < textPositions.get(i - 1).getXDirAdj()) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
|
||||
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0).getUnicode().equals("\t")))) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
}
|
||||
startIndex = i;
|
||||
}
|
||||
|
||||
|
||||
if (textPositions.get(i).getRotation() == 0 && i > 0 && textPositions.get(i).getX() > textPositions.get(i - 1).getEndX() + 1) {
|
||||
if (textPositions.get(i).getRotation() == 0 && i > 0 && textPositions.get(i)
|
||||
.getX() > textPositions.get(i - 1).getEndX() + 1) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
|
||||
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0).getUnicode().equals("\t")))) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
}
|
||||
startIndex = i;
|
||||
}
|
||||
|
||||
|
||||
if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
|
||||
.getUnicode()
|
||||
.equals("\u00A0")) && i <= textPositions.size() - 2) {
|
||||
.equals("\u00A0") || textPositions.get(i)
|
||||
.getUnicode()
|
||||
.equals("\t")) && i <= textPositions.size() - 2) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0).getUnicode().equals("\t")))) {
|
||||
|
||||
// Remove false sequence ends (whitespaces)
|
||||
if (previous != null && sublist.get(0).getYDirAdj() == previous.getYDirAdj() && sublist.get(0)
|
||||
.getXDirAdj() - (previous.getXDirAdj() + previous.getWidthDirAdj()) < 0.01) {
|
||||
for (TextPosition t : sublist) {
|
||||
textPositionSequences.get(textPositionSequences.size() - 1).add(t);
|
||||
}
|
||||
} else {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
}
|
||||
}
|
||||
startIndex = i + 1;
|
||||
}
|
||||
@ -311,13 +351,23 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, textPositions.size());
|
||||
if (!sublist.isEmpty() && (sublist.get(sublist.size() - 1)
|
||||
.getUnicode()
|
||||
.equals(" ") || sublist.get(sublist.size() - 1).getUnicode().equals("\u00A0"))) {
|
||||
.equals(" ") || sublist.get(sublist.size() - 1).getUnicode().equals("\u00A0") || sublist.get(sublist.size() - 1).getUnicode().equals("\t"))) {
|
||||
sublist = sublist.subList(0, sublist.size() - 1);
|
||||
}
|
||||
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals("\u00A0")))) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
.equals("\u00A0") || sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals("\t")))) {
|
||||
if (previous != null && sublist.get(0).getYDirAdj() == previous.getYDirAdj() && sublist.get(0)
|
||||
.getXDirAdj() - (previous.getXDirAdj() + previous.getWidthDirAdj()) < 0.01) {
|
||||
for (TextPosition t : sublist) {
|
||||
textPositionSequences.get(textPositionSequences.size() - 1).add(t);
|
||||
}
|
||||
} else {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
}
|
||||
}
|
||||
super.writeString(text);
|
||||
}
|
||||
|
||||
@ -22,6 +22,7 @@ public class RedTextPosition {
|
||||
private float width;
|
||||
private float heightDir;
|
||||
private float widthDirAdj;
|
||||
private float dir;
|
||||
|
||||
// not used in reanalysis
|
||||
@JsonIgnore
|
||||
|
||||
@ -138,6 +138,13 @@ public class TextPositionSequence implements CharSequence {
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getRotationAdjustedX() {
|
||||
|
||||
return textPositions.get(0).getXDirAdj();
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getY1() {
|
||||
|
||||
@ -235,7 +242,7 @@ public class TextPositionSequence implements CharSequence {
|
||||
float posYInit;
|
||||
float posYEnd;
|
||||
|
||||
if (textPositions.get(0).getRotation() == 90) {
|
||||
if (textPositions.get(0).getRotation() == 90 && textPositions.get(0).getDir() != 0.0f) {
|
||||
posXEnd = textPositions.get(0).getYDirAdj() + 2;
|
||||
posYInit = getY1();
|
||||
posYEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() - height + 4;
|
||||
@ -246,17 +253,24 @@ public class TextPositionSequence implements CharSequence {
|
||||
posXInit = textPositions.get(0).getPageWidth() - textPositions.get(0).getYDirAdj() - 2;
|
||||
posXEnd = textPositions.get(0).getPageWidth() - textPositions.get(textPositions.size() - 1)
|
||||
.getYDirAdj() + height;
|
||||
} else if(textPositions.get(0).getRotation() == 0 && textPositions.stream().map(t -> t.getY()).collect(toSet()).size() > 1) {
|
||||
} else if(textPositions.get(0).getRotation() == 0 && textPositions.get(0).getDir() == 270f) {
|
||||
posYInit = textPositions.get(0).getPageHeight() - getX1();
|
||||
posYEnd = textPositions.get(0).getPageHeight() - getX2() - textPositions.get(0)
|
||||
.getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj() - 3;
|
||||
posXInit = textPositions.get(0).getPageWidth() - textPositions.get(0).getYDirAdj() - 2;
|
||||
posXEnd = textPositions.get(0).getPageWidth() - textPositions.get(textPositions.size() - 1)
|
||||
.getYDirAdj() + height;
|
||||
} else if(textPositions.get(0).getRotation() == 90 && textPositions.get(0).getDir() == 0.0f){
|
||||
posXInit = textPositions.get(textPositions.size() - 1)
|
||||
.getXDirAdj() + textPositions.get(textPositions.size() - 1).getHeightDir();
|
||||
posXEnd = textPositions.get(0).getXDirAdj();
|
||||
posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2;
|
||||
posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1)
|
||||
.getYDirAdj() + 2;
|
||||
}
|
||||
else {
|
||||
posXEnd = textPositions.get(textPositions.size() - 1)
|
||||
.getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth() + 1;
|
||||
.getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + 1;
|
||||
posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2;
|
||||
posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1)
|
||||
.getYDirAdj() + 2;
|
||||
|
||||
@ -133,7 +133,7 @@ public class RedactionLogCreatorService {
|
||||
int startIndex = 0;
|
||||
for (int i = 1; i < textPositions.size(); i++) {
|
||||
float yDirAdj = textPositions.get(i).getYDirAdj();
|
||||
if (yDirAdj != y) {
|
||||
if (round(yDirAdj,3) != round(y, 3)) {
|
||||
rectangles.add(TextPositionSequence.fromData(textPositions.subList(startIndex, i), page)
|
||||
.getRectangle());
|
||||
y = yDirAdj;
|
||||
@ -149,6 +149,11 @@ public class RedactionLogCreatorService {
|
||||
return rectangles;
|
||||
}
|
||||
|
||||
private double round(float value, int decimalPoints) {
|
||||
var d = Math.pow(10, decimalPoints);
|
||||
return Math.round(value * d) / d;
|
||||
}
|
||||
|
||||
|
||||
private RedactionLogEntry createRedactionLogEntry(Entity entity, String dossierTemplateId) {
|
||||
|
||||
|
||||
@ -653,7 +653,7 @@ public class RedactionIntegrationTest {
|
||||
public void redactionTest() throws IOException {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/S11.pdf");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/new/VV-919901.pdf");
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
request.setExcludedPages(Set.of(1));
|
||||
|
||||
@ -886,7 +886,7 @@ public class RedactionIntegrationTest {
|
||||
public void classificationTest() throws IOException {
|
||||
|
||||
System.out.println("classificationTest");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/S11.pdf");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
|
||||
@ -908,7 +908,7 @@ public class RedactionIntegrationTest {
|
||||
public void sectionsTest() throws IOException {
|
||||
|
||||
System.out.println("sectionsTest");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/S11.pdf");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
|
||||
@ -930,7 +930,7 @@ public class RedactionIntegrationTest {
|
||||
public void htmlTablesTest() throws IOException {
|
||||
|
||||
System.out.println("htmlTablesTest");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/S11.pdf");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
|
||||
@ -1195,10 +1195,10 @@ public class RedactionIntegrationTest {
|
||||
|
||||
private static String getTemporaryDirectory() {
|
||||
|
||||
String tmpdir = System.getProperty("java.io.tmpdir");
|
||||
if (StringUtils.isNotBlank(tmpdir)) {
|
||||
return tmpdir;
|
||||
}
|
||||
// String tmpdir = System.getProperty("java.io.tmpdir");
|
||||
// if (StringUtils.isNotBlank(tmpdir)) {
|
||||
// return tmpdir;
|
||||
// }
|
||||
return "/tmp";
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,5 @@
|
||||
Amendment 1
|
||||
Report Number: 33168
|
||||
Page
|
||||
Report Number: BFI0714
|
||||
Tesh Consultants International
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user