RED-5381: Fixed calculation of textblocks and body text frame for rotated text and rotated pages
This commit is contained in:
parent
ddbf80e4a6
commit
aa43453206
@ -1,16 +1,18 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.NonNull;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
@RequiredArgsConstructor
|
||||
public class Page {
|
||||
@ -32,7 +34,8 @@ public class Page {
|
||||
private StringFrequencyCounter fontCounter = new StringFrequencyCounter();
|
||||
private StringFrequencyCounter fontStyleCounter = new StringFrequencyCounter();
|
||||
|
||||
private double cropBoxArea;
|
||||
private float pageWidth;
|
||||
private float pageHeight;
|
||||
|
||||
|
||||
public boolean isRotated() {
|
||||
|
||||
@ -1,8 +1,12 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.dslplatform.json.CompiledJson;
|
||||
import com.dslplatform.json.JsonAttribute;
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextDirection;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
@ -12,9 +16,6 @@ import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@AllArgsConstructor
|
||||
@Builder
|
||||
@Data
|
||||
@ -50,6 +51,139 @@ public class TextBlock extends AbstractTextContainer {
|
||||
private String classification;
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
public TextDirection getDir() {
|
||||
|
||||
return sequences.get(0).getDir();
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
private float getPageHeight() {
|
||||
|
||||
return sequences.get(0).getPageHeight();
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
private float getPageWidth() {
|
||||
|
||||
return sequences.get(0).getPageWidth();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the minX value in pdf coordinate system.
|
||||
* Note: This needs to use Pdf Coordinate System where {0,0} rotated with the page rotation.
|
||||
* 0 -> LowerLeft
|
||||
* 90 -> UpperLeft
|
||||
* 180 -> UpperRight
|
||||
* 270 -> LowerRight
|
||||
*
|
||||
* @return the minX value in pdf coordinate system
|
||||
*/
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
public float getPdfMinX() {
|
||||
|
||||
if (getDir().getDegrees() == 90) {
|
||||
return minY;
|
||||
} else if (getDir().getDegrees() == 180) {
|
||||
return getPageWidth() - maxX;
|
||||
|
||||
} else if (getDir().getDegrees() == 270) {
|
||||
|
||||
return getPageWidth() - maxY;
|
||||
} else {
|
||||
return minX;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the maxX value in pdf coordinate system.
|
||||
* Note: This needs to use Pdf Coordinate System where {0,0} rotated with the page rotation.
|
||||
* 0 -> LowerLeft
|
||||
* 90 -> UpperLeft
|
||||
* 180 -> UpperRight
|
||||
* 270 -> LowerRight
|
||||
*
|
||||
* @return the maxX value in pdf coordinate system
|
||||
*/
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
public float getPdfMaxX() {
|
||||
|
||||
if (getDir().getDegrees() == 90) {
|
||||
return maxY;
|
||||
} else if (getDir().getDegrees() == 180) {
|
||||
return getPageWidth() - minX;
|
||||
} else if (getDir().getDegrees() == 270) {
|
||||
return getPageWidth() - minY;
|
||||
|
||||
} else {
|
||||
return maxX;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the minY value in pdf coordinate system.
|
||||
* Note: This needs to use Pdf Coordinate System where {0,0} rotated with the page rotation.
|
||||
* 0 -> LowerLeft
|
||||
* 90 -> UpperLeft
|
||||
* 180 -> UpperRight
|
||||
* 270 -> LowerRight
|
||||
*
|
||||
* @return the minY value in pdf coordinate system
|
||||
*/
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
public float getPdfMinY() {
|
||||
|
||||
if (getDir().getDegrees() == 90) {
|
||||
return minX;
|
||||
} else if (getDir().getDegrees() == 180) {
|
||||
return maxY;
|
||||
|
||||
} else if (getDir().getDegrees() == 270) {
|
||||
return getPageHeight() - maxX;
|
||||
|
||||
} else {
|
||||
return getPageHeight() - maxY;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the maxY value in pdf coordinate system.
|
||||
* Note: This needs to use Pdf Coordinate System where {0,0} rotated with the page rotation.
|
||||
* 0 -> LowerLeft
|
||||
* 90 -> UpperLeft
|
||||
* 180 -> UpperRight
|
||||
* 270 -> LowerRight
|
||||
*
|
||||
* @return the maxY value in pdf coordinate system
|
||||
*/
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
public float getPdfMaxY() {
|
||||
|
||||
if (getDir().getDegrees() == 90) {
|
||||
return maxX;
|
||||
} else if (getDir().getDegrees() == 180) {
|
||||
|
||||
return minY;
|
||||
} else if (getDir().getDegrees() == 270) {
|
||||
return getPageHeight() - minX;
|
||||
} else {
|
||||
return getPageHeight() - minY;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public TextBlock(float minX, float maxX, float minY, float maxY, List<TextPositionSequence> sequences, int rotation) {
|
||||
|
||||
this.minX = minX;
|
||||
@ -97,17 +231,17 @@ public class TextBlock extends AbstractTextContainer {
|
||||
|
||||
public void add(TextPositionSequence r) {
|
||||
|
||||
if (r.getX1() < minX) {
|
||||
minX = r.getX1();
|
||||
if (r.getMinXDirAdj() < minX) {
|
||||
minX = r.getMinXDirAdj();
|
||||
}
|
||||
if (r.getX2() > maxX) {
|
||||
maxX = r.getX2();
|
||||
if (r.getMaxXDirAdj() > maxX) {
|
||||
maxX = r.getMaxXDirAdj();
|
||||
}
|
||||
if (r.getY1() < minY) {
|
||||
minY = r.getY1();
|
||||
if (r.getMinYDirAdj() < minY) {
|
||||
minY = r.getMinYDirAdj();
|
||||
}
|
||||
if (r.getY2() > maxY) {
|
||||
maxY = r.getY2();
|
||||
if (r.getMaxYDirAdj() > maxY) {
|
||||
maxY = r.getMaxYDirAdj();
|
||||
}
|
||||
}
|
||||
|
||||
@ -162,7 +296,7 @@ public class TextBlock extends AbstractTextContainer {
|
||||
TextPositionSequence previous = null;
|
||||
for (TextPositionSequence word : sequences) {
|
||||
if (previous != null) {
|
||||
if (Math.abs(previous.getRotationAdjustedY() - word.getRotationAdjustedY()) > word.getTextHeight()) {
|
||||
if (Math.abs(previous.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight()) {
|
||||
sb.append('\n');
|
||||
} else {
|
||||
sb.append(' ');
|
||||
|
||||
@ -14,13 +14,10 @@ import com.iqser.red.service.redaction.v1.server.classification.model.Orientatio
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.StringFrequencyCounter;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.utils.PositionUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.utils.RulingTextDirAdjustUtil;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
@Service
|
||||
@SuppressWarnings("all")
|
||||
@ -29,10 +26,17 @@ public class BlockificationService {
|
||||
static final float THRESHOLD = 1f;
|
||||
|
||||
|
||||
/**
|
||||
* This method is building blocks by expanding the minX/maxX and minY/maxY value on each word that is not split by the conditions.
|
||||
* This method must use text direction adjusted postions (DirAdj). Where {0,0} is on the upper left. Never try to change this!
|
||||
* Rulings (Table lines) must be adjusted to the text directions as well, when checking if a block is split by a ruling.
|
||||
* @param textPositions The words of a page.
|
||||
* @param horizontalRulingLines Horizontal table lines.
|
||||
* @param verticalRulingLines Vertical table lines.
|
||||
* @return Page object that contains the Textblock and text statistics.
|
||||
*/
|
||||
public Page blockify(List<TextPositionSequence> textPositions, List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
|
||||
|
||||
sortRotatedSequences(textPositions);
|
||||
|
||||
List<TextPositionSequence> chunkWords = new ArrayList<>();
|
||||
List<AbstractTextContainer> chunkBlockList1 = new ArrayList<>();
|
||||
|
||||
@ -43,23 +47,15 @@ public class BlockificationService {
|
||||
Float splitX1 = null;
|
||||
for (TextPositionSequence word : textPositions) {
|
||||
|
||||
boolean lineSeparation = minY - word.getY2() > word.getHeight() * 1.25;
|
||||
boolean startFromTop = word.getY1() > maxY + word.getHeight();
|
||||
boolean splitByX = prev != null && maxX + 50 < word.getX1() && prev.getY1() == word.getY1();
|
||||
boolean newLineAfterSplit = prev != null && word.getY1() != prev.getY1() && wasSplitted && splitX1 != word.getX1();
|
||||
boolean splittedByRuling = isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), verticalRulingLines) || isSplittedByRuling(minX,
|
||||
minY,
|
||||
word.getX1(),
|
||||
word.getY2(),
|
||||
horizontalRulingLines)
|
||||
boolean lineSeparation = word.getMinYDirAdj() - maxY > word.getHeight() * 1.25;
|
||||
boolean startFromTop = prev != null && word.getMinYDirAdj() < prev.getMinYDirAdj() - prev.getTextHeight();
|
||||
boolean splitByX = prev != null && maxX + 50 < word.getMinXDirAdj() && prev.getMinYDirAdj() == word.getMinYDirAdj();
|
||||
boolean xIsBeforeFirstX = prev != null && word.getMinXDirAdj() < minX;
|
||||
boolean newLineAfterSplit = prev != null && word.getMinYDirAdj() != prev.getMinYDirAdj() && wasSplitted && splitX1 != word.getMinXDirAdj();
|
||||
boolean isSpitByRuling = isSpitByRuling(minX, minY, maxX, maxY, word, horizontalRulingLines, verticalRulingLines);
|
||||
boolean splitByDir = prev != null && !prev.getDir().equals(word.getDir());
|
||||
|
||||
|| isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), horizontalRulingLines) || isSplittedByRuling(minX,
|
||||
minY,
|
||||
word.getX1(),
|
||||
word.getY2(),
|
||||
verticalRulingLines);
|
||||
|
||||
if (prev != null && (lineSeparation || startFromTop || splitByX || newLineAfterSplit || splittedByRuling)) {
|
||||
if (prev != null && (lineSeparation || startFromTop || splitByX || splitByDir || isSpitByRuling)) {
|
||||
|
||||
Orientation prevOrientation = null;
|
||||
if (!chunkBlockList1.isEmpty()) {
|
||||
@ -70,15 +66,15 @@ public class BlockificationService {
|
||||
chunkBlockList1.add(cb1);
|
||||
chunkWords = new ArrayList<>();
|
||||
|
||||
if (splitByX && !splittedByRuling) {
|
||||
if (splitByX && !isSpitByRuling) {
|
||||
wasSplitted = true;
|
||||
cb1.setOrientation(Orientation.LEFT);
|
||||
splitX1 = word.getX1();
|
||||
} else if (newLineAfterSplit && !splittedByRuling) {
|
||||
splitX1 = word.getMinXDirAdj();
|
||||
} else if (newLineAfterSplit && !isSpitByRuling) {
|
||||
wasSplitted = false;
|
||||
cb1.setOrientation(Orientation.RIGHT);
|
||||
splitX1 = null;
|
||||
} else if (prevOrientation != null && prevOrientation.equals(Orientation.RIGHT) && (lineSeparation || !startFromTop || !splitByX || !newLineAfterSplit || !splittedByRuling)) {
|
||||
} else if (prevOrientation != null && prevOrientation.equals(Orientation.RIGHT) && (lineSeparation || !startFromTop || !splitByX || !newLineAfterSplit || !isSpitByRuling)) {
|
||||
cb1.setOrientation(Orientation.LEFT);
|
||||
}
|
||||
|
||||
@ -92,17 +88,17 @@ public class BlockificationService {
|
||||
chunkWords.add(word);
|
||||
|
||||
prev = word;
|
||||
if (word.getX1() < minX) {
|
||||
minX = word.getX1();
|
||||
if (word.getMinXDirAdj() < minX) {
|
||||
minX = word.getMinXDirAdj();
|
||||
}
|
||||
if (word.getX2() > maxX) {
|
||||
maxX = word.getX2();
|
||||
if (word.getMaxXDirAdj() > maxX) {
|
||||
maxX = word.getMaxXDirAdj();
|
||||
}
|
||||
if (word.getY1() < minY) {
|
||||
minY = word.getY1();
|
||||
if (word.getMinYDirAdj() < minY) {
|
||||
minY = word.getMinYDirAdj();
|
||||
}
|
||||
if (word.getY2() > maxY) {
|
||||
maxY = word.getY2();
|
||||
if (word.getMaxYDirAdj() > maxY) {
|
||||
maxY = word.getMaxYDirAdj();
|
||||
}
|
||||
}
|
||||
|
||||
@ -186,7 +182,7 @@ public class BlockificationService {
|
||||
styleFrequencyCounter.add(wordBlock.getFontStyle());
|
||||
|
||||
if (textBlock == null) {
|
||||
textBlock = new TextBlock(wordBlock.getX1(), wordBlock.getX2(), wordBlock.getY1(), wordBlock.getY2(), wordBlockList, wordBlock.getRotation());
|
||||
textBlock = new TextBlock(wordBlock.getMinXDirAdj(), wordBlock.getMaxXDirAdj(), wordBlock.getMinYDirAdj(), wordBlock.getMaxYDirAdj(), wordBlockList, wordBlock.getRotation());
|
||||
} else {
|
||||
TextBlock spatialEntity = textBlock.union(wordBlock);
|
||||
textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(), spatialEntity.getWidth(), spatialEntity.getHeight());
|
||||
@ -202,17 +198,58 @@ public class BlockificationService {
|
||||
textBlock.setHighestFontSize(fontSizeFrequencyCounter.getHighest());
|
||||
}
|
||||
|
||||
if (textBlock != null && textBlock.getSequences() != null && textBlock.getSequences().stream().map(t -> round(t.getY1(), 3)).collect(toSet()).size() == 1) {
|
||||
textBlock.getSequences().sort(Comparator.comparing(TextPositionSequence::getX1));
|
||||
if (textBlock != null && textBlock.getSequences() != null && textBlock.getSequences().stream().map(t -> round(t.getMinYDirAdj(), 3)).collect(toSet()).size() == 1) {
|
||||
textBlock.getSequences().sort(Comparator.comparing(TextPositionSequence::getMinXDirAdj));
|
||||
}
|
||||
return textBlock;
|
||||
}
|
||||
|
||||
|
||||
private boolean isSplittedByRuling(float previousX2, float previousY1, float currentX1, float currentY1, List<Ruling> rulingLines) {
|
||||
private boolean isSpitByRuling(float minX,
|
||||
float minY,
|
||||
float maxX,
|
||||
float maxY,
|
||||
TextPositionSequence word,
|
||||
List<Ruling> horizontalRulingLines,
|
||||
List<Ruling> verticalRulingLines) {
|
||||
|
||||
return isSplitByRuling(maxX,
|
||||
minY,
|
||||
word.getMinXDirAdj(),
|
||||
word.getMinYDirAdj(),
|
||||
verticalRulingLines,
|
||||
word.getDir().getDegrees(),
|
||||
word.getPageWidth(),
|
||||
word.getPageHeight()) || isSplitByRuling(minX,
|
||||
minY,
|
||||
word.getMinXDirAdj(),
|
||||
word.getMaxYDirAdj(),
|
||||
horizontalRulingLines,
|
||||
word.getDir().getDegrees(),
|
||||
word.getPageWidth(),
|
||||
word.getPageHeight()) || isSplitByRuling(maxX,
|
||||
minY,
|
||||
word.getMinXDirAdj(),
|
||||
word.getMinYDirAdj(),
|
||||
horizontalRulingLines,
|
||||
word.getDir().getDegrees(),
|
||||
word.getPageWidth(),
|
||||
word.getPageHeight()) || isSplitByRuling(minX,
|
||||
minY,
|
||||
word.getMinXDirAdj(),
|
||||
word.getMaxYDirAdj(),
|
||||
verticalRulingLines,
|
||||
word.getDir().getDegrees(),
|
||||
word.getPageWidth(),
|
||||
word.getPageHeight());
|
||||
}
|
||||
|
||||
|
||||
private boolean isSplitByRuling(float previousX2, float previousY1, float currentX1, float currentY1, List<Ruling> rulingLines, float dir, float pageWidth, float pageHeight) {
|
||||
|
||||
for (Ruling ruling : rulingLines) {
|
||||
if (ruling.intersectsLine(previousX2, previousY1, currentX1, currentY1)) {
|
||||
var line = RulingTextDirAdjustUtil.convertToDirAdj(ruling, dir, pageWidth, pageHeight);
|
||||
if (line.intersectsLine(previousX2, previousY1, currentX1, currentY1)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -220,103 +257,6 @@ public class BlockificationService {
|
||||
}
|
||||
|
||||
|
||||
public Rectangle calculateBodyTextFrame(List<Page> pages, FloatFrequencyCounter documentFontSizeCounter, boolean landscape) {
|
||||
|
||||
float minX = 10000;
|
||||
float maxX = -100;
|
||||
float minY = 10000;
|
||||
float maxY = -100;
|
||||
|
||||
for (Page page : pages) {
|
||||
|
||||
if (page.getTextBlocks().isEmpty() || landscape != page.isLandscape()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (AbstractTextContainer container : page.getTextBlocks()) {
|
||||
|
||||
if (container instanceof TextBlock) {
|
||||
TextBlock textBlock = (TextBlock) container;
|
||||
if (textBlock.getMostPopularWordFont() == null || textBlock.getMostPopularWordStyle() == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
float approxLineCount = PositionUtils.getApproxLineCount(textBlock);
|
||||
if (approxLineCount < 2.9f) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (documentFontSizeCounter.getMostPopular() != null) {
|
||||
if (textBlock.getMostPopularWordFontSize() >= documentFontSizeCounter.getMostPopular()) {
|
||||
|
||||
if (textBlock.getMinX() < minX) {
|
||||
minX = textBlock.getMinX();
|
||||
}
|
||||
if (textBlock.getMaxX() > maxX) {
|
||||
maxX = textBlock.getMaxX();
|
||||
}
|
||||
if (textBlock.getMinY() < minY) {
|
||||
minY = textBlock.getMinY();
|
||||
}
|
||||
if (textBlock.getMaxY() > maxY) {
|
||||
maxY = textBlock.getMaxY();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (container instanceof Table) {
|
||||
Table table = (Table) container;
|
||||
for (List<Cell> row : table.getRows()) {
|
||||
for (Cell cell : row) {
|
||||
|
||||
if (cell == null || cell.getTextBlocks() == null) {
|
||||
continue;
|
||||
}
|
||||
for (TextBlock textBlock : cell.getTextBlocks()) {
|
||||
if (textBlock.getMinX() < minX) {
|
||||
minX = textBlock.getMinX();
|
||||
}
|
||||
if (textBlock.getMaxX() > maxX) {
|
||||
maxX = textBlock.getMaxX();
|
||||
}
|
||||
if (textBlock.getMinY() < minY) {
|
||||
minY = textBlock.getMinY();
|
||||
}
|
||||
if (textBlock.getMaxY() > maxY) {
|
||||
maxY = textBlock.getMaxY();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return new Rectangle(minY, minX, maxX - minX, maxY - minY);
|
||||
}
|
||||
|
||||
|
||||
private void sortRotatedSequences(List<TextPositionSequence> sequences) {
|
||||
|
||||
List<TextPositionSequence> rotatedWords = new ArrayList<>();
|
||||
Iterator<TextPositionSequence> itty = sequences.iterator();
|
||||
while (itty.hasNext()) {
|
||||
var pos = itty.next();
|
||||
if (pos.getTextPositions().get(0).getDir() == 270) {
|
||||
rotatedWords.add(pos);
|
||||
itty.remove();
|
||||
}
|
||||
}
|
||||
|
||||
if (!rotatedWords.isEmpty() && !sequences.isEmpty()) {
|
||||
rotatedWords.sort(Comparator.comparing(TextPositionSequence::getX1));
|
||||
}
|
||||
sequences.addAll(rotatedWords);
|
||||
}
|
||||
|
||||
|
||||
private double round(float value, int decimalPoints) {
|
||||
|
||||
var d = Math.pow(10, decimalPoints);
|
||||
|
||||
@ -0,0 +1,171 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.service;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Point;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.FloatFrequencyCounter;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.utils.PositionUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
@Service
|
||||
public class BodyTextFrameService {
|
||||
|
||||
/**
|
||||
* Adjusts and sets the body text frame to a page.
|
||||
* Note: This needs to use Pdf Coordinate System where {0,0} rotated with the page rotation.
|
||||
* 0 -> LowerLeft
|
||||
* 90 -> UpperLeft
|
||||
* 180 -> UpperRight
|
||||
* 270 -> LowerRight
|
||||
* The aspect ratio of the page is also regarded.
|
||||
*
|
||||
* @param page The page
|
||||
* @param bodyTextFrame frame that contains the main text on portrait pages
|
||||
* @param landscapeBodyTextFrame frame that contains the main text on landscape pages
|
||||
*/
|
||||
public void setBodyTextFrameAdjustedToPage(Page page, Rectangle bodyTextFrame, Rectangle landscapeBodyTextFrame) {
|
||||
|
||||
Rectangle textFrame = page.isLandscape() ? landscapeBodyTextFrame : bodyTextFrame;
|
||||
|
||||
if (page.getPageWidth() > page.getPageHeight() && page.getRotation() == 270) {
|
||||
textFrame = new Rectangle(new Point(textFrame.getTopLeft().getY(), page.getPageHeight() - textFrame.getTopLeft().getX() - textFrame.getWidth()),
|
||||
textFrame.getHeight(),
|
||||
textFrame.getWidth(),
|
||||
0);
|
||||
} else if (page.getPageWidth() > page.getPageHeight() && page.getRotation() != 0) {
|
||||
textFrame = new Rectangle(new Point(textFrame.getTopLeft().getY(), textFrame.getTopLeft().getX()), textFrame.getHeight(), textFrame.getWidth(), page.getPageNumber());
|
||||
} else if (page.getRotation() == 180) {
|
||||
textFrame = new Rectangle(new Point(textFrame.getTopLeft().getX(), page.getPageHeight() - textFrame.getTopLeft().getY() - textFrame.getHeight()),
|
||||
textFrame.getWidth(),
|
||||
textFrame.getHeight(),
|
||||
0);
|
||||
}
|
||||
page.setBodyTextFrame(textFrame);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the frame that contains the main text, text outside the frame will be e.g. headers or footers.
|
||||
* Note: This needs to use Pdf Coordinate System where {0,0} rotated with the page rotation.
|
||||
* 0 -> LowerLeft
|
||||
* 90 -> UpperLeft
|
||||
* 180 -> UpperRight
|
||||
* 270 -> LowerRight
|
||||
* The aspect ratio of the page is also regarded.
|
||||
*
|
||||
* @param pages List of all pages
|
||||
* @param documentFontSizeCounter Statistics of the document
|
||||
* @param landscape Calculate for landscape or portrait
|
||||
* @return Rectangle of the text frame
|
||||
*/
|
||||
public Rectangle calculateBodyTextFrame(List<Page> pages, FloatFrequencyCounter documentFontSizeCounter, boolean landscape) {
|
||||
|
||||
float minX = 10000;
|
||||
float maxX = -100;
|
||||
float minY = 10000;
|
||||
float maxY = -100;
|
||||
|
||||
for (Page page : pages) {
|
||||
|
||||
if (page.getTextBlocks().isEmpty() || landscape != page.isLandscape()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (AbstractTextContainer container : page.getTextBlocks()) {
|
||||
|
||||
if (container instanceof TextBlock) {
|
||||
TextBlock textBlock = (TextBlock) container;
|
||||
if (textBlock.getMostPopularWordFont() == null || textBlock.getMostPopularWordStyle() == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
float approxLineCount = PositionUtils.getApproxLineCount(textBlock);
|
||||
if (approxLineCount < 2.9f) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (documentFontSizeCounter.getMostPopular() != null && textBlock.getMostPopularWordFontSize() >= documentFontSizeCounter.getMostPopular()) {
|
||||
|
||||
if (page.getPageWidth() > page.getPageHeight() && page.getRotation() != 0) {
|
||||
if (textBlock.getPdfMinY() < minX) {
|
||||
minX = textBlock.getPdfMinY();
|
||||
}
|
||||
if (textBlock.getPdfMaxY() > maxX) {
|
||||
maxX = textBlock.getPdfMaxY();
|
||||
}
|
||||
if (textBlock.getPdfMinX() < minY) {
|
||||
minY = textBlock.getPdfMinX();
|
||||
}
|
||||
if (textBlock.getPdfMaxX() > maxY) {
|
||||
maxY = textBlock.getPdfMaxX();
|
||||
}
|
||||
} else {
|
||||
if (textBlock.getPdfMinX() < minX) {
|
||||
minX = textBlock.getPdfMinX();
|
||||
}
|
||||
if (textBlock.getPdfMaxX() > maxX) {
|
||||
maxX = textBlock.getPdfMaxX();
|
||||
}
|
||||
if (textBlock.getPdfMinY() < minY) {
|
||||
minY = textBlock.getPdfMinY();
|
||||
}
|
||||
if (textBlock.getPdfMaxY() > maxY) {
|
||||
maxY = textBlock.getPdfMaxY();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (container instanceof Table) {
|
||||
Table table = (Table) container;
|
||||
for (List<Cell> row : table.getRows()) {
|
||||
for (Cell cell : row) {
|
||||
|
||||
if (cell == null || cell.getTextBlocks() == null) {
|
||||
continue;
|
||||
}
|
||||
for (TextBlock textBlock : cell.getTextBlocks()) {
|
||||
if (page.getPageWidth() > page.getPageHeight() && page.getRotation() != 0) {
|
||||
if (textBlock.getPdfMinY() < minX) {
|
||||
minX = textBlock.getMinY();
|
||||
}
|
||||
if (textBlock.getPdfMaxY() > maxX) {
|
||||
maxX = textBlock.getPdfMaxY();
|
||||
}
|
||||
if (textBlock.getPdfMinX() < minY) {
|
||||
minY = textBlock.getPdfMinX();
|
||||
}
|
||||
if (textBlock.getPdfMaxX() > maxY) {
|
||||
maxY = textBlock.getPdfMaxX();
|
||||
}
|
||||
} else {
|
||||
if (textBlock.getPdfMinX() < minX) {
|
||||
minX = textBlock.getPdfMinX();
|
||||
}
|
||||
if (textBlock.getPdfMaxX() > maxX) {
|
||||
maxX = textBlock.getPdfMaxX();
|
||||
}
|
||||
if (textBlock.getPdfMinY() < minY) {
|
||||
minY = textBlock.getPdfMinY();
|
||||
}
|
||||
if (textBlock.getPdfMaxY() > maxY) {
|
||||
maxY = textBlock.getPdfMaxY();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return new Rectangle(new Point(minX, minY), maxX - minX, maxY - minY, 0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,76 +1,75 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.service;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.utils.PositionUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class ClassificationService {
|
||||
|
||||
private final BlockificationService blockificationService;
|
||||
private final BodyTextFrameService bodyTextFrameService;
|
||||
|
||||
|
||||
public void classifyDocument(Document document) {
|
||||
|
||||
Rectangle bodyTextFrame = blockificationService.calculateBodyTextFrame(document.getPages(), document.getFontSizeCounter(), false);
|
||||
Rectangle landscapeBodyTextFrame = blockificationService.calculateBodyTextFrame(document.getPages(), document.getFontSizeCounter(), true);
|
||||
|
||||
Rectangle bodyTextFrame = bodyTextFrameService.calculateBodyTextFrame(document.getPages(), document.getFontSizeCounter(), false);
|
||||
Rectangle landscapeBodyTextFrame = bodyTextFrameService.calculateBodyTextFrame(document.getPages(), document.getFontSizeCounter(), true);
|
||||
List<Float> headlineFontSizes = document.getFontSizeCounter().getHighterThanMostPopular();
|
||||
|
||||
log.debug("Document FontSize counters are: {}", document.getFontSizeCounter().getCountPerValue());
|
||||
|
||||
for (Page page : document.getPages()) {
|
||||
Rectangle btf = page.isLandscape() ? landscapeBodyTextFrame : bodyTextFrame;
|
||||
page.setBodyTextFrame(btf);
|
||||
classifyPage(btf, page, document, headlineFontSizes);
|
||||
bodyTextFrameService.setBodyTextFrameAdjustedToPage(page, bodyTextFrame, landscapeBodyTextFrame);
|
||||
classifyPage(page, document, headlineFontSizes);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void classifyPage(Rectangle bodyTextFrame, Page page, Document document, List<Float> headlineFontSizes) {
|
||||
public void classifyPage(Page page, Document document, List<Float> headlineFontSizes) {
|
||||
|
||||
for (AbstractTextContainer textBlock : page.getTextBlocks()) {
|
||||
if (textBlock instanceof TextBlock) {
|
||||
classifyBlock((TextBlock) textBlock, bodyTextFrame, page, document, headlineFontSizes);
|
||||
classifyBlock((TextBlock) textBlock, page, document, headlineFontSizes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void classifyBlock(TextBlock textBlock, Rectangle bodyTextFrame, Page page, Document document, List<Float> headlineFontSizes) {
|
||||
public void classifyBlock(TextBlock textBlock, Page page, Document document, List<Float> headlineFontSizes) {
|
||||
|
||||
var bodyTextFrame = page.getBodyTextFrame();
|
||||
|
||||
if (document.getFontSizeCounter().getMostPopular() == null) {
|
||||
textBlock.setClassification("Other");
|
||||
return;
|
||||
}
|
||||
if (PositionUtils.isOverBodyTextFrame(bodyTextFrame, textBlock, page.isRotated()) && (document.getFontSizeCounter()
|
||||
if (PositionUtils.isOverBodyTextFrame(bodyTextFrame, textBlock, page.getRotation()) && (document.getFontSizeCounter()
|
||||
.getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter().getMostPopular())) {
|
||||
textBlock.setClassification("Header");
|
||||
|
||||
} else if (PositionUtils.isUnderBodyTextFrame(bodyTextFrame, textBlock) && (document.getFontSizeCounter()
|
||||
} else if (PositionUtils.isUnderBodyTextFrame(bodyTextFrame, textBlock, page.getRotation()) && (document.getFontSizeCounter()
|
||||
.getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter().getMostPopular())) {
|
||||
textBlock.setClassification("Footer");
|
||||
} else if (page.getPageNumber() == 1 && (!PositionUtils.isTouchingUnderBodyTextFrame(bodyTextFrame,
|
||||
textBlock) && PositionUtils.getHeightDifferenceBetweenChunkWordAndDocumentWord(textBlock,
|
||||
} else if (page.getPageNumber() == 1 && (PositionUtils.getHeightDifferenceBetweenChunkWordAndDocumentWord(textBlock,
|
||||
document.getTextHeightCounter().getMostPopular()) > 2.5 && textBlock.getHighestFontSize() > document.getFontSizeCounter().getMostPopular() || page.getTextBlocks()
|
||||
.size() == 1)) {
|
||||
if (!Pattern.matches("[0-9]+", textBlock.toString())) {
|
||||
textBlock.setClassification("Title");
|
||||
}
|
||||
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter()
|
||||
} else if (textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter()
|
||||
.getMostPopular() && PositionUtils.getApproxLineCount(textBlock) < 4.9 && (textBlock.getMostPopularWordStyle().equals("bold") || !document.getFontStyleCounter()
|
||||
.getCountPerValue()
|
||||
.containsKey("bold") && textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter().getMostPopular() + 1) && textBlock.getSequences()
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.utils;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@ -9,17 +9,19 @@ import lombok.experimental.UtilityClass;
|
||||
@SuppressWarnings("all")
|
||||
public class PositionUtils {
|
||||
|
||||
// TODO This currently uses pdf coord system. In the futher this should use java coord system.
|
||||
// Note: DirAdj (TextDirection Adjusted) can not be user for this.
|
||||
public boolean isWithinBodyTextFrame(Rectangle btf, TextBlock textBlock) {
|
||||
|
||||
//TODO Currently this is not working for rotated pages.
|
||||
|
||||
if (btf == null || textBlock == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
double threshold = textBlock.getMostPopularWordHeight() * 3;
|
||||
|
||||
if (textBlock.getMinX() + threshold > btf.getX() && textBlock.getMaxX() - threshold < btf.getX() + btf.getWidth() && textBlock.getMinY() + threshold > btf.getY() && textBlock.getMaxY() - threshold < btf.getY() + btf.getHeight()) {
|
||||
if (textBlock.getPdfMinX() + threshold > btf.getTopLeft().getX() && textBlock.getPdfMaxX() - threshold < btf.getTopLeft()
|
||||
.getX() + btf.getWidth() && textBlock.getPdfMinY() + threshold > btf.getTopLeft().getY() && textBlock.getPdfMaxY() - threshold < btf.getTopLeft()
|
||||
.getY() + btf.getHeight()) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
@ -28,16 +30,27 @@ public class PositionUtils {
|
||||
}
|
||||
|
||||
|
||||
public boolean isOverBodyTextFrame(Rectangle btf, TextBlock textBlock, boolean rotated) {
|
||||
// TODO This currently uses pdf coord system. In the futher this should use java coord system.
|
||||
// Note: DirAdj (TextDirection Adjusted) can not be user for this.
|
||||
public boolean isOverBodyTextFrame(Rectangle btf, TextBlock textBlock, int rotation) {
|
||||
|
||||
if (btf == null || textBlock == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (rotated && textBlock.getMinX() < btf.getX()) {
|
||||
// Its very strange, P{0,0} is on top left in this case, instead of lower left.
|
||||
if (rotation == 90 && textBlock.getPdfMaxX() < btf.getTopLeft().getX()) {
|
||||
return true;
|
||||
} else if (!rotated && textBlock.getMinY() > btf.getY() + btf.getHeight()) {
|
||||
}
|
||||
|
||||
if (rotation == 180 && textBlock.getPdfMaxY() < btf.getTopLeft().getY()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (rotation == 270 && textBlock.getPdfMinX() > btf.getTopLeft().getX() + btf.getWidth()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (rotation == 0 && textBlock.getPdfMinY() > btf.getTopLeft().getY() + btf.getHeight()) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
@ -45,16 +58,27 @@ public class PositionUtils {
|
||||
|
||||
}
|
||||
|
||||
|
||||
public boolean isUnderBodyTextFrame(Rectangle btf, TextBlock textBlock) {
|
||||
|
||||
//TODO Currently this is not working for rotated pages.
|
||||
// TODO This currently uses pdf coord system. In the futher this should use java coord system.
|
||||
// Note: DirAdj (TextDirection Adjusted) can not be user for this.
|
||||
public boolean isUnderBodyTextFrame(Rectangle btf, TextBlock textBlock, int rotation) {
|
||||
|
||||
if (btf == null || textBlock == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (textBlock.getMaxY() < btf.getY()) {
|
||||
if (rotation == 90 && textBlock.getPdfMinX() > btf.getTopLeft().getX() + btf.getWidth()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (rotation == 180 && textBlock.getPdfMinY() > btf.getTopLeft().getY() + btf.getHeight()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (rotation == 270 && textBlock.getPdfMaxX() < btf.getTopLeft().getX()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (rotation == 0 && textBlock.getPdfMaxY() < btf.getTopLeft().getY()) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
@ -62,7 +86,8 @@ public class PositionUtils {
|
||||
|
||||
}
|
||||
|
||||
|
||||
// TODO This currently uses pdf coord system. In the futher this should use java coord system.
|
||||
// Note: DirAdj (TextDirection Adjusted) can not be user for this.
|
||||
public boolean isTouchingUnderBodyTextFrame(Rectangle btf, TextBlock textBlock) {
|
||||
|
||||
//TODO Currently this is not working for rotated pages.
|
||||
@ -71,7 +96,7 @@ public class PositionUtils {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (textBlock.getMinY() < btf.getY()) {
|
||||
if (textBlock.getMinY() < btf.getTopLeft().getY()) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
|
||||
@ -0,0 +1,67 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.utils;
|
||||
|
||||
import java.awt.geom.Line2D;
|
||||
import java.awt.geom.Point2D;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class RulingTextDirAdjustUtil {
|
||||
|
||||
/**
|
||||
* Converts a ruling (line of a table) the same way TextPositions are converted in PDFBox.
|
||||
* This will get the y position of the text, adjusted so that 0,0 is upper left and it is adjusted based on the text direction.
|
||||
*
|
||||
* See org.apache.pdfbox.text.TextPosition
|
||||
*/
|
||||
public Line2D.Float convertToDirAdj(Ruling ruling, float dir, float pageWidth, float pageHeight) {
|
||||
|
||||
return new Line2D.Float(convertPoint(ruling.x1, ruling.y1, dir, pageWidth, pageHeight), convertPoint(ruling.x2, ruling.y2, dir, pageWidth, pageHeight));
|
||||
}
|
||||
|
||||
|
||||
private Point2D convertPoint(float x, float y, float dir, float pageWidth, float pageHeight) {
|
||||
|
||||
var xAdj = getXRot(x, y, dir, pageWidth, pageHeight);
|
||||
var yAdj = 0f;
|
||||
if (dir == 0 || dir == 180) {
|
||||
yAdj = pageHeight - getYLowerLeftRot(x, y, dir, pageWidth, pageHeight);
|
||||
} else {
|
||||
yAdj = pageWidth - getYLowerLeftRot(x, y, dir, pageWidth, pageHeight);
|
||||
}
|
||||
return new Point2D.Float(xAdj, yAdj);
|
||||
}
|
||||
|
||||
|
||||
private float getXRot(float x, float y, float dir, float pageWidth, float pageHeight) {
|
||||
|
||||
if (dir == 0) {
|
||||
return x;
|
||||
} else if (dir == 90) {
|
||||
return y;
|
||||
} else if (dir == 180) {
|
||||
return pageWidth - x;
|
||||
} else if (dir == 270) {
|
||||
return pageHeight - y;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
private float getYLowerLeftRot(float x, float y, float dir, float pageWidth, float pageHeight) {
|
||||
|
||||
if (dir == 0) {
|
||||
return y;
|
||||
} else if (dir == 90) {
|
||||
return pageWidth - x;
|
||||
} else if (dir == 180) {
|
||||
return pageHeight - y;
|
||||
} else if (dir == 270) {
|
||||
return x;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
@ -47,16 +47,6 @@ public class TextPositionSequence implements CharSequence {
|
||||
}
|
||||
|
||||
|
||||
public TextPositionSequence fromData(List<RedTextPosition> textPositions, int page) {
|
||||
|
||||
var textPositionSequence = new TextPositionSequence();
|
||||
textPositionSequence.textPositions = textPositions;
|
||||
textPositionSequence.page = page;
|
||||
|
||||
return textPositionSequence;
|
||||
}
|
||||
|
||||
|
||||
public TextPositionSequence(List<TextPosition> textPositions, int page) {
|
||||
|
||||
this.textPositions = textPositions.stream().map(RedTextPosition::fromTextPosition).collect(Collectors.toList());
|
||||
@ -147,59 +137,63 @@ public class TextPositionSequence implements CharSequence {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This value is adjusted so that 0,0 is upper left and it is adjusted based on the text direction.
|
||||
* This method ignores the page rotation but takes the text rotation and adjusts the coordinates to awt.
|
||||
*
|
||||
* @return the text direction adjusted minX value
|
||||
*/
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
public float getX1() {
|
||||
public float getMinXDirAdj() {
|
||||
|
||||
return textPositions.get(0).getXDirAdj();
|
||||
|
||||
if (rotation == 90) {
|
||||
return textPositions.get(0).getYDirAdj() - getTextHeight();
|
||||
} else {
|
||||
return textPositions.get(0).getXDirAdj();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This value is adjusted so that 0,0 is upper left and it is adjusted based on the text direction.
|
||||
* This method ignores the page rotation but takes the text rotation and adjusts the coordinates to awt.
|
||||
*
|
||||
* @return the text direction adjusted maxX value
|
||||
*/
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
public float getX2() {
|
||||
public float getMaxXDirAdj() {
|
||||
|
||||
return textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + HEIGHT_PADDING;
|
||||
|
||||
if (rotation == 90) {
|
||||
return textPositions.get(0).getYDirAdj();
|
||||
} else {
|
||||
return textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + HEIGHT_PADDING;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This value is adjusted so that 0,0 is upper left and it is adjusted based on the text direction.
|
||||
* This method ignores the page rotation but takes the text rotation and adjusts the coordinates to awt.
|
||||
*
|
||||
* @return the text direction adjusted minY value. The upper border of the bounding box of the word.
|
||||
*/
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
public float getRotationAdjustedY() {
|
||||
public float getMinYDirAdj() {
|
||||
|
||||
return textPositions.get(0).getYDirAdj() - getTextHeight();
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This value is adjusted so that 0,0 is upper left and it is adjusted based on the text direction.
|
||||
* This method ignores the page rotation but takes the text rotation and adjusts the coordinates to awt.
|
||||
*
|
||||
* @return the text direction adjusted maxY value. The lower border of the bounding box of the word.
|
||||
*/
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
public float getMaxYDirAdj() {
|
||||
|
||||
return textPositions.get(0).getYDirAdj();
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
public float getY1() {
|
||||
|
||||
if (rotation == 90) {
|
||||
return textPositions.get(0).getXDirAdj();
|
||||
} else {
|
||||
return pageHeight - textPositions.get(0).getYDirAdj();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
public float getY2() {
|
||||
|
||||
if (rotation == 90) {
|
||||
return textPositions.get(textPositions.size() - 1).getXDirAdj() + getTextHeight() - HEIGHT_PADDING;
|
||||
} else {
|
||||
return pageHeight - textPositions.get(0).getYDirAdj() + getTextHeight();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -215,7 +209,7 @@ public class TextPositionSequence implements CharSequence {
|
||||
@JsonAttribute(ignore = true)
|
||||
public float getHeight() {
|
||||
|
||||
return getY2() - getY1();
|
||||
return getMaxYDirAdj() - getMinYDirAdj();
|
||||
}
|
||||
|
||||
|
||||
@ -223,7 +217,7 @@ public class TextPositionSequence implements CharSequence {
|
||||
@JsonAttribute(ignore = true)
|
||||
public float getWidth() {
|
||||
|
||||
return getX2() - getX1();
|
||||
return getMaxXDirAdj() - getMinXDirAdj();
|
||||
}
|
||||
|
||||
|
||||
@ -270,6 +264,15 @@ public class TextPositionSequence implements CharSequence {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This returns the bounding box of the word in Pdf Coordinate System where {0,0} rotated with the page rotation.
|
||||
* 0 -> LowerLeft
|
||||
* 90 -> UpperLeft
|
||||
* 180 -> UpperRight
|
||||
* 270 -> LowerRight
|
||||
*
|
||||
* @return bounding box of the word in Pdf Coordinate System
|
||||
*/
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
@SneakyThrows
|
||||
|
||||
@ -33,7 +33,7 @@ public class CellValue {
|
||||
TextPositionSequence previous = null;
|
||||
for (TextPositionSequence word : textBlock.getSequences()) {
|
||||
if (previous != null) {
|
||||
if (Math.abs(previous.getRotationAdjustedY() - word.getRotationAdjustedY()) > word.getTextHeight()) {
|
||||
if (Math.abs(previous.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight()) {
|
||||
sb.append('\n');
|
||||
} else {
|
||||
sb.append(' ');
|
||||
|
||||
@ -194,7 +194,7 @@ public class SearchableText {
|
||||
for (TextPositionSequence word : sequences) {
|
||||
|
||||
if (previous != null) {
|
||||
if (Math.abs(previous.getRotationAdjustedY() - word.getRotationAdjustedY()) > word.getTextHeight()) {
|
||||
if (Math.abs(previous.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight()) {
|
||||
sb.append('\n');
|
||||
} else {
|
||||
sb.append(' ');
|
||||
@ -228,7 +228,7 @@ public class SearchableText {
|
||||
for (TextPositionSequence word : sorted) {
|
||||
|
||||
if (previous != null) {
|
||||
if (Math.abs(previous.getRotationAdjustedY() - word.getRotationAdjustedY()) > word.getTextHeight()) {
|
||||
if (Math.abs(previous.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight()) {
|
||||
sb.append('\n');
|
||||
} else {
|
||||
sb.append(' ');
|
||||
@ -249,7 +249,7 @@ public class SearchableText {
|
||||
for (TextPositionSequence word : sequences) {
|
||||
|
||||
if (previous != null) {
|
||||
if (Math.abs(previous.getRotationAdjustedY() - word.getRotationAdjustedY()) > word.getTextHeight()) {
|
||||
if (Math.abs(previous.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight()) {
|
||||
sb.append('\n');
|
||||
} else {
|
||||
sb.append(' ');
|
||||
|
||||
@ -43,12 +43,13 @@ public class SectionGridCreatorService {
|
||||
|
||||
if (textBlock instanceof TextBlock) {
|
||||
|
||||
TextBlock tb = (TextBlock) textBlock;
|
||||
classifiedDoc.getSectionGrid()
|
||||
.getRectanglesPerPage()
|
||||
.computeIfAbsent(page, (x) -> new ArrayList<>())
|
||||
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()),
|
||||
textBlock.getWidth(),
|
||||
textBlock.getHeight(),
|
||||
.add(new SectionRectangle(new Point(tb.getPdfMinX(), tb.getPdfMinY()),
|
||||
tb.getPdfMaxX() - tb.getPdfMinX(),
|
||||
tb.getPdfMaxY() - tb.getPdfMinY(),
|
||||
i + 1,
|
||||
paragraph.getPageBlocks().size(),
|
||||
null));
|
||||
|
||||
@ -292,9 +292,9 @@ public class EntitySearchUtils {
|
||||
.get(0)
|
||||
.getSequences()
|
||||
.get(0)
|
||||
.getX1() && image.getPosition().getX() + image.getPosition().getWidth() > entity.getPositionSequences().get(0).getSequences().get(0).getX2() && image.getPosition()
|
||||
.getY() < entity.getPositionSequences().get(0).getSequences().get(0).getY1() && image.getPosition().getY() + image.getPosition()
|
||||
.getHeight() > entity.getPositionSequences().get(0).getSequences().get(0).getY2();
|
||||
.getMinXDirAdj() && image.getPosition().getX() + image.getPosition().getWidth() > entity.getPositionSequences().get(0).getSequences().get(0).getMaxXDirAdj() && image.getPosition()
|
||||
.getY() < entity.getPositionSequences().get(0).getSequences().get(0).getMinYDirAdj() && image.getPosition().getY() + image.getPosition()
|
||||
.getHeight() > entity.getPositionSequences().get(0).getSequences().get(0).getMaxYDirAdj();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -99,24 +99,22 @@ public class PdfSegmentationService {
|
||||
stripper.getText(pdDocument);
|
||||
|
||||
PDRectangle pdr = pdPage.getMediaBox();
|
||||
boolean isLandscape = pdr.getWidth() > pdr.getHeight();
|
||||
|
||||
int rotation = pdPage.getRotation();
|
||||
boolean isRotated = rotation != 0 && rotation != 360;
|
||||
boolean isLandscape = pdr.getWidth() > pdr.getHeight() && (rotation == 0 || rotation == 180) || pdr.getHeight() > pdr.getWidth() && (rotation == 90 || rotation == 270);
|
||||
|
||||
PDRectangle cropbox = pdPage.getCropBox();
|
||||
CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(pdfTableCells.get(pageNumber),
|
||||
stripper.getRulings(),
|
||||
stripper.getMinCharWidth(),
|
||||
stripper.getMaxCharHeight());
|
||||
Page page = blockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
|
||||
|
||||
PDRectangle cropbox = pdPage.getCropBox();
|
||||
float cropboxArea = cropbox.getHeight() * cropbox.getWidth();
|
||||
page.setCropBoxArea(cropboxArea);
|
||||
|
||||
page.setRotation(rotation);
|
||||
page.setLandscape(isLandscape || isRotated);
|
||||
page.setLandscape(isLandscape);
|
||||
page.setPageNumber(pageNumber);
|
||||
page.setPageWidth(cropbox.getWidth());
|
||||
page.setPageHeight(cropbox.getHeight());
|
||||
|
||||
tableExtractionService.extractTables(cleanRulings, page);
|
||||
buildPageStatistics(page);
|
||||
|
||||
@ -4,6 +4,7 @@ import com.dslplatform.json.JsonAttribute;
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Orientation;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
@ -34,6 +35,12 @@ public abstract class AbstractTextContainer {
|
||||
public abstract String getText();
|
||||
|
||||
|
||||
public boolean containsBlock(TextBlock other) {
|
||||
|
||||
return this.minX <= other.getPdfMinX() && this.maxX >= other.getPdfMaxX() && this.minY >= other.getPdfMinY() && this.maxY <= other.getPdfMaxY();
|
||||
}
|
||||
|
||||
|
||||
public boolean contains(AbstractTextContainer other) {
|
||||
|
||||
return this.minX <= other.minX && this.maxX >= other.maxX && this.minY >= other.minY && this.maxY <= other.maxY;
|
||||
|
||||
@ -51,7 +51,7 @@ public class Cell extends Rectangle {
|
||||
|
||||
for (TextPositionSequence word : textBlock.getSequences()) {
|
||||
if (previous != null) {
|
||||
if (Math.abs(previous.getRotationAdjustedY() - word.getRotationAdjustedY()) > word.getTextHeight()) {
|
||||
if (Math.abs(previous.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight()) {
|
||||
sb.append('\n');
|
||||
} else {
|
||||
sb.append(' ');
|
||||
|
||||
@ -1,15 +1,27 @@
|
||||
package com.iqser.red.service.redaction.v1.server.tableextraction.service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.*;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||
import java.awt.geom.Point2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.awt.geom.Point2D;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||
|
||||
@Service
|
||||
public class TableExtractionService {
|
||||
@ -54,6 +66,19 @@ public class TableExtractionService {
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Finds tables on a page and moves textblocks into cells of the found tables.
|
||||
* Note: This algorithm uses Pdf Coordinate System where {0,0} rotated with the page rotation.
|
||||
* 0 -> LowerLeft
|
||||
* 90 -> UpperLeft
|
||||
* 180 -> UpperRight
|
||||
* 270 -> LowerRight
|
||||
*
|
||||
* DirAdj (Text direction adjusted) values can not be used here.
|
||||
*
|
||||
* @param cleanRulings The lines used to build the table.
|
||||
* @param page Page object that contains textblocks and statistics.
|
||||
*/
|
||||
public void extractTables(CleanRulings cleanRulings, Page page) {
|
||||
|
||||
List<Cell> cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical());
|
||||
@ -63,7 +88,10 @@ public class TableExtractionService {
|
||||
for (AbstractTextContainer abstractTextContainer : page.getTextBlocks()) {
|
||||
TextBlock textBlock = (TextBlock) abstractTextContainer;
|
||||
for (Cell cell : cells) {
|
||||
if (cell.intersects(textBlock.getMinX(), textBlock.getMinY(), textBlock.getWidth(), textBlock.getHeight())) {
|
||||
if (cell.intersects(textBlock.getPdfMinX(),
|
||||
textBlock.getPdfMinY(),
|
||||
textBlock.getPdfMaxX() - textBlock.getPdfMinX(),
|
||||
textBlock.getPdfMaxY() - textBlock.getPdfMinY())) {
|
||||
cell.addTextBlock(textBlock);
|
||||
toBeRemoved.add(textBlock);
|
||||
break;
|
||||
@ -94,7 +122,7 @@ public class TableExtractionService {
|
||||
Iterator<AbstractTextContainer> itty = page.getTextBlocks().iterator();
|
||||
while (itty.hasNext()) {
|
||||
AbstractTextContainer textBlock = itty.next();
|
||||
if (table.contains(textBlock) && position == -1) {
|
||||
if (textBlock instanceof TextBlock ? table.containsBlock((TextBlock) textBlock) : table.contains(textBlock) && position == -1) {
|
||||
position = page.getTextBlocks().indexOf(textBlock);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,5 +1,15 @@
|
||||
package com.iqser.red.service.redaction.v1.server.visualization.service;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
||||
@ -11,16 +21,6 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@ -79,10 +79,11 @@ public class PdfVisualisationService {
|
||||
}
|
||||
|
||||
contentStream.setStrokingColor(Color.YELLOW);
|
||||
contentStream.addRect((float) analyzedPage.getBodyTextFrame().getX(),
|
||||
(float) analyzedPage.getBodyTextFrame().getY(),
|
||||
(float) analyzedPage.getBodyTextFrame().getWidth(),
|
||||
(float) analyzedPage.getBodyTextFrame().getHeight());
|
||||
contentStream.addRect(analyzedPage.getBodyTextFrame().getTopLeft().getX(),
|
||||
analyzedPage.getBodyTextFrame().getTopLeft().getY(),
|
||||
analyzedPage.getBodyTextFrame().getWidth(),
|
||||
analyzedPage.getBodyTextFrame().getHeight());
|
||||
|
||||
contentStream.stroke();
|
||||
|
||||
contentStream.close();
|
||||
@ -94,20 +95,39 @@ public class PdfVisualisationService {
|
||||
|
||||
contentStream.setStrokingColor(Color.RED);
|
||||
|
||||
contentStream.addRect(textBlock.getMinX(), textBlock.getMinY(), textBlock.getWidth(), textBlock.getHeight());
|
||||
contentStream.addRect(textBlock.getPdfMinX(), textBlock.getPdfMinY(), textBlock.getPdfMaxX() - textBlock.getPdfMinX(), textBlock.getPdfMaxY() - textBlock.getPdfMinY());
|
||||
contentStream.stroke();
|
||||
|
||||
if (textBlock.getClassification() != null) {
|
||||
contentStream.beginText();
|
||||
|
||||
contentStream.setNonStrokingColor(Color.BLUE);
|
||||
contentStream.setFont(PDType1Font.TIMES_ROMAN, 12f);
|
||||
contentStream.setFont(PDType1Font.TIMES_ROMAN, 9f);
|
||||
|
||||
contentStream.newLineAtOffset(textBlock.getMinX(), textBlock.getMaxY());
|
||||
|
||||
contentStream.showText(textBlock.getClassification() + textBlock.getOrientation());
|
||||
contentStream.newLineAtOffset(textBlock.getPdfMinX(), textBlock.getPdfMaxY() + 2);
|
||||
contentStream.showText(textBlock.getClassification() + textBlock.getOrientation() + "-->" + textBlock.getSequences().get(0).getDir());
|
||||
|
||||
contentStream.endText();
|
||||
|
||||
contentStream.setNonStrokingColor(Color.BLUE);
|
||||
contentStream.setFont(PDType1Font.TIMES_ROMAN, 2f);
|
||||
|
||||
// contentStream.beginText();
|
||||
// contentStream.newLineAtOffset(textBlock.getPdfMinX(), textBlock.getPdfMinY());
|
||||
// contentStream.showText("MinX,MinY(" + textBlock.getPdfMinX() + "," + textBlock.getPdfMinY() + ")");
|
||||
// contentStream.endText();
|
||||
// contentStream.beginText();
|
||||
// contentStream.newLineAtOffset(textBlock.getPdfMaxX(), textBlock.getPdfMinY());
|
||||
// contentStream.showText("MaxX,MinY(" + textBlock.getPdfMaxX() + "," + textBlock.getPdfMinY() + ")");
|
||||
// contentStream.endText();
|
||||
// contentStream.beginText();
|
||||
// contentStream.newLineAtOffset(textBlock.getPdfMinX(), textBlock.getPdfMaxY());
|
||||
// contentStream.showText("MinX,MaxY(" + textBlock.getPdfMinX() + "," + textBlock.getPdfMaxY() + ")");
|
||||
// contentStream.endText();
|
||||
// contentStream.beginText();
|
||||
// contentStream.newLineAtOffset(textBlock.getPdfMaxX(), textBlock.getPdfMaxY());
|
||||
// contentStream.showText("MaxX,MaxY(" + textBlock.getPdfMaxX() + "," + textBlock.getPdfMaxY() + ")");
|
||||
// contentStream.endText();
|
||||
}
|
||||
}
|
||||
|
||||
@ -124,7 +144,10 @@ public class PdfVisualisationService {
|
||||
|
||||
contentStream.setStrokingColor(Color.GREEN);
|
||||
for (TextBlock textBlock : cell.getTextBlocks()) {
|
||||
contentStream.addRect(textBlock.getMinX(), textBlock.getMinY(), textBlock.getWidth(), textBlock.getHeight());
|
||||
contentStream.addRect(textBlock.getPdfMinX(),
|
||||
textBlock.getPdfMinY(),
|
||||
textBlock.getPdfMaxX() - textBlock.getPdfMinX(),
|
||||
textBlock.getPdfMaxY() - textBlock.getPdfMinY());
|
||||
contentStream.stroke();
|
||||
}
|
||||
}
|
||||
|
||||
@ -155,9 +155,8 @@ public class HeadlinesGoldStandardIntegrationTest {
|
||||
|
||||
System.out.println("Precision is: " + precision + " recall is: " + recall);
|
||||
|
||||
Assertions.assertThat(precision).isGreaterThanOrEqualTo(0.45f);
|
||||
Assertions.assertThat(precision).isGreaterThanOrEqualTo(0.44f);
|
||||
Assertions.assertThat(recall).isGreaterThanOrEqualTo(0.69f);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -364,7 +364,7 @@ public class RedactionIntegrationTest {
|
||||
@Test
|
||||
public void titleExtraction() throws IOException {
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/RSS/32 - Emamectin Benzoate Technical - Acute Oral Toxicity - Mouse.pdf");
|
||||
AnalyzeRequest request = prepareStorage("files/RSS/06 - Isopyrazam - Acute Oral Toxicity Rat.pdf");
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
@ -1098,7 +1098,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
System.out.println("classificationTest");
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
|
||||
AnalyzeRequest request = prepareStorage("files/new/RotateTestFile.pdf");
|
||||
|
||||
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||
.dossierId(request.getDossierId())
|
||||
|
||||
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user