Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d84bda6e5e | ||
|
|
86addc3139 | ||
|
|
78fb6b825b | ||
|
|
ad1e44ca5c | ||
|
|
e99fad0c3d |
@ -29,13 +29,13 @@ import com.knecon.fforesight.service.layoutparser.processor.model.Classification
|
|||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
|
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
|
import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
|
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
|
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.CvTableParsingAdapter;
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.CvTableParsingAdapter;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.ImageServiceResponseAdapter;
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.ImageServiceResponseAdapter;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.VisualLayoutParsingAdapter;
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.VisualLayoutParsingAdapter;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells;
|
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.services.BodyTextFrameService;
|
import com.knecon.fforesight.service.layoutparser.processor.services.BodyTextFrameService;
|
||||||
@ -219,7 +219,7 @@ public class LayoutParsingPipeline {
|
|||||||
|
|
||||||
PDDocument originDocument = openDocument(originFile);
|
PDDocument originDocument = openDocument(originFile);
|
||||||
addNumberOfPagesToTrace(originDocument.getNumberOfPages(), Files.size(originFile.toPath()));
|
addNumberOfPagesToTrace(originDocument.getNumberOfPages(), Files.size(originFile.toPath()));
|
||||||
Map<Integer, List<TableCells>> pdfTableCells = cvTableParsingAdapter.buildCvParsedTablesPerPage(tableServiceResponse);
|
Map<Integer, List<Ruling>> pdfTableRulings = cvTableParsingAdapter.buildCvParsedRulingsPerPage(tableServiceResponse);
|
||||||
Map<Integer, List<ClassifiedImage>> pdfImages = imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse);
|
Map<Integer, List<ClassifiedImage>> pdfImages = imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse);
|
||||||
Map<Integer, List<ClassifiedImage>> signatures = visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse);
|
Map<Integer, List<ClassifiedImage>> signatures = visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse);
|
||||||
ClassificationDocument classificationDocument = new ClassificationDocument();
|
ClassificationDocument classificationDocument = new ClassificationDocument();
|
||||||
@ -258,7 +258,7 @@ public class LayoutParsingPipeline {
|
|||||||
boolean isLandscape = pdr.getWidth() > pdr.getHeight() && (rotation == 0 || rotation == 180) || pdr.getHeight() > pdr.getWidth() && (rotation == 90 || rotation == 270);
|
boolean isLandscape = pdr.getWidth() > pdr.getHeight() && (rotation == 0 || rotation == 180) || pdr.getHeight() > pdr.getWidth() && (rotation == 90 || rotation == 270);
|
||||||
|
|
||||||
PDRectangle cropbox = pdPage.getCropBox();
|
PDRectangle cropbox = pdPage.getCropBox();
|
||||||
CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(pdfTableCells.get(pageNumber), stripper.getRulings());
|
CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(pdfTableRulings.get(pageNumber), stripper.getRulings());
|
||||||
|
|
||||||
ClassificationPage classificationPage = switch (layoutParsingType) {
|
ClassificationPage classificationPage = switch (layoutParsingType) {
|
||||||
case REDACT_MANAGER -> redactManagerBlockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
|
case REDACT_MANAGER -> redactManagerBlockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
|
||||||
@ -282,7 +282,7 @@ public class LayoutParsingPipeline {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (signatures.containsKey(pageNumber)) {
|
if (signatures.containsKey(pageNumber)) {
|
||||||
if (classificationPage.getImages() == null || classificationPage.getImages().size() == 0) {
|
if (classificationPage.getImages() == null || classificationPage.getImages().isEmpty()) {
|
||||||
classificationPage.setImages(signatures.get(pageNumber));
|
classificationPage.setImages(signatures.get(pageNumber));
|
||||||
} else {
|
} else {
|
||||||
classificationPage.getImages().addAll(signatures.get(pageNumber));
|
classificationPage.getImages().addAll(signatures.get(pageNumber));
|
||||||
|
|||||||
@ -36,6 +36,7 @@ public class LayoutParsingStorageService {
|
|||||||
private final StorageService storageService;
|
private final StorageService storageService;
|
||||||
private final ObjectMapper objectMapper;
|
private final ObjectMapper objectMapper;
|
||||||
|
|
||||||
|
|
||||||
@Observed(name = "LayoutParsingStorageService", contextualName = "get-origin-file")
|
@Observed(name = "LayoutParsingStorageService", contextualName = "get-origin-file")
|
||||||
public File getOriginFile(String storageId) throws IOException {
|
public File getOriginFile(String storageId) throws IOException {
|
||||||
|
|
||||||
@ -61,28 +62,25 @@ public class LayoutParsingStorageService {
|
|||||||
|
|
||||||
try (InputStream inputStream = getObject(storageId)) {
|
try (InputStream inputStream = getObject(storageId)) {
|
||||||
|
|
||||||
ImageServiceResponse imageServiceResponse = objectMapper.readValue(inputStream, ImageServiceResponse.class);
|
return objectMapper.readValue(inputStream, ImageServiceResponse.class);
|
||||||
inputStream.close();
|
|
||||||
return imageServiceResponse;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public TableServiceResponse getTablesFile(String storageId) throws IOException {
|
public TableServiceResponse getTablesFile(String storageId) throws IOException {
|
||||||
|
|
||||||
try (var tableClassificationStream = getObject(storageId)) {
|
try (InputStream tableClassificationStream = getObject(storageId)) {
|
||||||
|
|
||||||
TableServiceResponse tableServiceResponse = objectMapper.readValue(tableClassificationStream, TableServiceResponse.class);
|
return objectMapper.readValue(tableClassificationStream, TableServiceResponse.class);
|
||||||
tableClassificationStream.close();
|
|
||||||
return tableServiceResponse;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public VisualLayoutParsingResponse getVisualLayoutParsingFile(String storageId) throws IOException {
|
public VisualLayoutParsingResponse getVisualLayoutParsingFile(String storageId) throws IOException {
|
||||||
|
|
||||||
try (InputStream inputStream = getObject(storageId)) {
|
try (InputStream inputStream = getObject(storageId)) {
|
||||||
VisualLayoutParsingResponse visualLayoutParsingResponse = objectMapper.readValue(inputStream, VisualLayoutParsingResponse.class);
|
|
||||||
return visualLayoutParsingResponse;
|
return objectMapper.readValue(inputStream, VisualLayoutParsingResponse.class);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.adapter;
|
package com.knecon.fforesight.service.layoutparser.processor.python_api.adapter;
|
||||||
|
|
||||||
|
import java.awt.geom.Point2D;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
@ -8,8 +9,9 @@ import java.util.Map;
|
|||||||
|
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.PageInfo;
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.PageInfo;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells;
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableLine;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
@ -20,33 +22,54 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
public class CvTableParsingAdapter {
|
public class CvTableParsingAdapter {
|
||||||
|
|
||||||
public Map<Integer, List<TableCells>> buildCvParsedTablesPerPage(TableServiceResponse tableServiceResponse) {
|
private static final double CROOKED_THRESHOLD = 0.2;
|
||||||
|
|
||||||
Map<Integer, List<TableCells>> tableCells = new HashMap<>();
|
|
||||||
|
public Map<Integer, List<Ruling>> buildCvParsedRulingsPerPage(TableServiceResponse tableServiceResponse) {
|
||||||
|
|
||||||
|
Map<Integer, List<Ruling>> rulings = new HashMap<>();
|
||||||
tableServiceResponse.getData()
|
tableServiceResponse.getData()
|
||||||
.forEach(tableData -> tableCells.computeIfAbsent(tableData.getPageInfo().getNumber(), tableCell -> new ArrayList<>())
|
.forEach(tableData -> rulings.computeIfAbsent(tableData.getPageInfo().getNumber(), ruling -> new ArrayList<>())
|
||||||
.addAll(convertTableCells(tableData.getTableCells(), tableData.getPageInfo())));
|
.addAll(convertTableLines(tableData.getTableLines(), tableData.getPageInfo())));
|
||||||
|
|
||||||
return tableCells;
|
return rulings;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private Collection<TableCells> convertTableCells(List<TableCells> tableCells, PageInfo pageInfo) {
|
private Collection<Ruling> convertTableLines(List<TableLine> tableLines, PageInfo pageInfo) {
|
||||||
|
|
||||||
List<TableCells> cvParsedTableCells = new ArrayList<>();
|
List<Ruling> cvParsedRulings = new ArrayList<>();
|
||||||
|
|
||||||
tableCells.stream()
|
tableLines.forEach(l -> {
|
||||||
.filter(cell -> cell.getWidth() < pageInfo.getWidth() * 0.98 && cell.getHeight() < pageInfo.getHeight() * 0.98)
|
Point2D.Double p1 = new Point2D.Double(l.getX1() * pageInfo.getWidth(), (1 - l.getY1()) * pageInfo.getHeight());
|
||||||
.forEach(t -> cvParsedTableCells.add(TableCells.builder()
|
Point2D.Double p2 = new Point2D.Double(l.getX2() * pageInfo.getWidth(), (1 - l.getY2()) * pageInfo.getHeight());
|
||||||
.y0(t.getY0())
|
|
||||||
.x1(t.getX1())
|
|
||||||
.y1(t.getY1())
|
|
||||||
.x0(t.getX0())
|
|
||||||
.width(t.getWidth())
|
|
||||||
.height(t.getHeight())
|
|
||||||
.build()));
|
|
||||||
|
|
||||||
return cvParsedTableCells;
|
// Determine if line is primarily horizontal or vertical
|
||||||
|
double xDiff = Math.abs(p1.x - p2.x);
|
||||||
|
double yDiff = Math.abs(p1.y - p2.y);
|
||||||
|
|
||||||
|
if (xDiff < yDiff) {
|
||||||
|
if (xDiff / yDiff > CROOKED_THRESHOLD || yDiff > pageInfo.getHeight() * 0.98) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// straighten x-coordinates
|
||||||
|
double avgX = (p1.x + p2.x) / 2;
|
||||||
|
p1.x = avgX;
|
||||||
|
p2.x = avgX;
|
||||||
|
} else {
|
||||||
|
if (yDiff / xDiff > CROOKED_THRESHOLD || xDiff > pageInfo.getWidth() * 0.98) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// straighten y-coordinates
|
||||||
|
double avgY = (p1.y + p2.y) / 2;
|
||||||
|
p1.y = avgY;
|
||||||
|
p2.y = avgY;
|
||||||
|
}
|
||||||
|
|
||||||
|
cvParsedRulings.add(new Ruling(p1, p2));
|
||||||
|
});
|
||||||
|
|
||||||
|
return cvParsedRulings;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -9,7 +9,7 @@ import lombok.NoArgsConstructor;
|
|||||||
@Builder
|
@Builder
|
||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class TableCells {
|
public class TableCell {
|
||||||
|
|
||||||
private float x0;
|
private float x0;
|
||||||
private float y0;
|
private float y0;
|
||||||
@ -15,6 +15,6 @@ import lombok.NoArgsConstructor;
|
|||||||
public class TableData {
|
public class TableData {
|
||||||
|
|
||||||
private PageInfo pageInfo;
|
private PageInfo pageInfo;
|
||||||
private List<TableCells> tableCells = new ArrayList<>();
|
private List<TableLine> tableLines = new ArrayList<>();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -0,0 +1,20 @@
|
|||||||
|
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@Builder
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
public class TableLine {
|
||||||
|
|
||||||
|
private float x1;
|
||||||
|
private float y1;
|
||||||
|
|
||||||
|
private float x2;
|
||||||
|
private float y2;
|
||||||
|
|
||||||
|
}
|
||||||
@ -14,7 +14,6 @@ import org.springframework.stereotype.Service;
|
|||||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
|
import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Rectangle;
|
import com.knecon.fforesight.service.layoutparser.processor.model.table.Rectangle;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
|
import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells;
|
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.utils.UnionFind;
|
import com.knecon.fforesight.service.layoutparser.processor.utils.UnionFind;
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
@ -31,14 +30,14 @@ public class RulingCleaningService {
|
|||||||
private static final float THRESHOLD_Y_HORIZONTAL = 3;
|
private static final float THRESHOLD_Y_HORIZONTAL = 3;
|
||||||
|
|
||||||
|
|
||||||
public CleanRulings getCleanRulings(List<TableCells> tableCells, List<Ruling> rulings) {
|
public CleanRulings getCleanRulings(List<Ruling> parsedRulings, List<Ruling> rulings) {
|
||||||
|
|
||||||
Rulings verticalAndHorizontalRulingLines;
|
Rulings verticalAndHorizontalRulingLines;
|
||||||
|
|
||||||
if (!rulings.isEmpty()) {
|
if (rulings.isEmpty() && parsedRulings != null) {
|
||||||
verticalAndHorizontalRulingLines = extractVerticalAndHorizontalRulingLines(rulings);
|
verticalAndHorizontalRulingLines = extractVerticalAndHorizontalRulingLines(parsedRulings);
|
||||||
} else {
|
} else {
|
||||||
verticalAndHorizontalRulingLines = getRulingsFromParsedCells(tableCells);
|
verticalAndHorizontalRulingLines = extractVerticalAndHorizontalRulingLines(rulings);
|
||||||
}
|
}
|
||||||
|
|
||||||
verticalAndHorizontalRulingLines.verticalLines.sort(X_FIRST_RULING_COMPARATOR);
|
verticalAndHorizontalRulingLines.verticalLines.sort(X_FIRST_RULING_COMPARATOR);
|
||||||
@ -81,7 +80,7 @@ public class RulingCleaningService {
|
|||||||
Rectangle rectangle2 = rectangles.get(j);
|
Rectangle rectangle2 = rectangles.get(j);
|
||||||
|
|
||||||
// we can stop early when we are too far off because of x-y-sorting
|
// we can stop early when we are too far off because of x-y-sorting
|
||||||
if(rectangle1.getRight() < rectangle2.getLeft() && rectangle1.getBottom() < rectangle2.getTop()) {
|
if (rectangle1.getRight() < rectangle2.getLeft() && rectangle1.getBottom() < rectangle2.getTop()) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -159,83 +158,19 @@ public class RulingCleaningService {
|
|||||||
private Rulings extractVerticalAndHorizontalRulingLines(List<Ruling> rulings) {
|
private Rulings extractVerticalAndHorizontalRulingLines(List<Ruling> rulings) {
|
||||||
|
|
||||||
List<Ruling> vrs = new ArrayList<>();
|
List<Ruling> vrs = new ArrayList<>();
|
||||||
for (Ruling vr : rulings) {
|
List<Ruling> hrs = new ArrayList<>();
|
||||||
if (vr.vertical()) {
|
for (Ruling r : rulings) {
|
||||||
vrs.add(vr);
|
if (r.vertical()) {
|
||||||
|
vrs.add(r);
|
||||||
|
}
|
||||||
|
if (r.horizontal()) {
|
||||||
|
hrs.add(r);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
List<Ruling> hrs = new ArrayList<>();
|
|
||||||
for (Ruling hr : rulings) {
|
|
||||||
if (hr.horizontal()) {
|
|
||||||
hrs.add(hr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return new Rulings(vrs, hrs);
|
return new Rulings(vrs, hrs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private Rulings getRulingsFromParsedCells(List<TableCells> tableCells) {
|
|
||||||
|
|
||||||
List<Ruling> vrs = extractVerticalRulingsFromParsedCells(tableCells);
|
|
||||||
List<Ruling> hrs = extractHorizontalRulingsFromParsedCells(tableCells);
|
|
||||||
return new Rulings(vrs, hrs);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private List<Ruling> extractVerticalRulingsFromParsedCells(List<TableCells> tableCells) {
|
|
||||||
|
|
||||||
List<Ruling> vrs = new ArrayList<>();
|
|
||||||
|
|
||||||
if (tableCells != null) {
|
|
||||||
for (TableCells tableCell : tableCells) {
|
|
||||||
Ruling leftLine = createRuling(tableCell.getX0(), tableCell.getX0(), tableCell.getY0(), tableCell.getY1());
|
|
||||||
Ruling rightLine = createRuling(tableCell.getX1(), tableCell.getX1(), tableCell.getY0(), tableCell.getY1());
|
|
||||||
vrs.add(leftLine);
|
|
||||||
vrs.add(rightLine);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return vrs;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private List<Ruling> extractHorizontalRulingsFromParsedCells(List<TableCells> tableCells) {
|
|
||||||
|
|
||||||
List<Ruling> hrs = new ArrayList<>();
|
|
||||||
|
|
||||||
if (tableCells != null) {
|
|
||||||
for (TableCells tableCell : tableCells) {
|
|
||||||
Ruling topLine = createRuling(tableCell.getX0(), tableCell.getX1(), tableCell.getY1(), tableCell.getY1());
|
|
||||||
Ruling baseLine = createRuling(tableCell.getX0(), tableCell.getX1(), tableCell.getY0(), tableCell.getY0());
|
|
||||||
hrs.add(topLine);
|
|
||||||
hrs.add(baseLine);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return hrs;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private Ruling createRuling(float tableCellX0, float tableCellX1, float tableCellY0, float tableCellY1) {
|
|
||||||
|
|
||||||
float x0 = tableCellX0;
|
|
||||||
float x1 = tableCellX1;
|
|
||||||
float y0 = tableCellY0;
|
|
||||||
float y1 = tableCellY1;
|
|
||||||
|
|
||||||
if (x1 < x0) {
|
|
||||||
x0 = tableCellX1;
|
|
||||||
x1 = tableCellX0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (y1 < y0) {
|
|
||||||
y0 = tableCellY1;
|
|
||||||
y1 = tableCellY0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return new Ruling(new Point2D.Float(x0, y0), new Point2D.Float(x1, y1));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private record Rulings(List<Ruling> verticalLines, List<Ruling> horizontalLines) {
|
private record Rulings(List<Ruling> verticalLines, List<Ruling> horizontalLines) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -41,23 +41,24 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
|||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@Disabled
|
//@Disabled
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void testViewerDocumentWithTables() {
|
public void testViewerDocumentWithTables() {
|
||||||
|
|
||||||
String fileName = "files/cv_tables/brokenTablesOnOcr_ocred.pdf";
|
String fileName = "files/cv_tables/VV-331340-first100.pdf";
|
||||||
String tableFileName = "files/cv_tables/brokenTablesOnOcr_ocred.TABLES.json";
|
String tableFileName = "files/cv_tables/VV-331340-first100.TABLES.json";
|
||||||
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
|
Path path = Path.of(fileName);
|
||||||
|
String tmpFileName = "/tmp/" + path.getFileName() + "_VIEWER.pdf";
|
||||||
|
|
||||||
var mapper = ObjectMapperFactory.create();
|
var mapper = ObjectMapperFactory.create();
|
||||||
var tableResponse = mapper.readValue(new ClassPathResource(tableFileName).getInputStream(), TableServiceResponse.class);
|
var tableResponse = mapper.readValue(new ClassPathResource(tableFileName).getInputStream(), TableServiceResponse.class);
|
||||||
var documentFile = new ClassPathResource(fileName).getFile();
|
var documentFile = new ClassPathResource(fileName).getFile();
|
||||||
|
|
||||||
var classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.DOCUMINE,
|
var classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
|
||||||
documentFile,
|
documentFile,
|
||||||
new ImageServiceResponse(),
|
new ImageServiceResponse(),
|
||||||
tableResponse,
|
tableResponse,
|
||||||
new VisualLayoutParsingResponse(),Path.of(fileName).getFileName().toFile().toString());
|
new VisualLayoutParsingResponse(), path.getFileName().toFile().toString());
|
||||||
ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null);
|
ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null);
|
||||||
LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
|
LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
|
||||||
Document document = DocumentGraphFactory.buildDocumentGraph(classificationDocument);
|
Document document = DocumentGraphFactory.buildDocumentGraph(classificationDocument);
|
||||||
@ -65,5 +66,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
|||||||
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true);
|
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -28,13 +28,16 @@ import com.knecon.fforesight.service.layoutparser.processor.model.Classification
|
|||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
|
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
|
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
|
import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.CvTableParsingAdapter;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
|
import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService;
|
import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService;
|
||||||
import com.knecon.fforesight.service.layoutparser.server.utils.AbstractTest;
|
import com.knecon.fforesight.service.layoutparser.server.utils.AbstractTest;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw;
|
||||||
|
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
|
|
||||||
@ -52,6 +55,9 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Autowired
|
@Autowired
|
||||||
private SectionsBuilderService sectionsBuilderService;
|
private SectionsBuilderService sectionsBuilderService;
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
private CvTableParsingAdapter cvTableParsingAdapter;
|
||||||
|
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public ClassificationDocument buildClassificationDocument(File originDocument, TableServiceResponse tableServiceResponse) {
|
public ClassificationDocument buildClassificationDocument(File originDocument, TableServiceResponse tableServiceResponse) {
|
||||||
@ -91,9 +97,11 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@Disabled
|
||||||
public void tablesToHtmlDebuggerWithCVResponse() throws IOException {
|
public void tablesToHtmlDebuggerWithCVResponse() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/cv_tables/ScanRotationBorder.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/cv_tables/ScanRotationBorder.pdf");
|
||||||
|
// the format has changed and this is not up-to-date
|
||||||
ClassPathResource cvTablesResource = new ClassPathResource("files/cv_tables/ScanRotationBorder.TABLES.json");
|
ClassPathResource cvTablesResource = new ClassPathResource("files/cv_tables/ScanRotationBorder.TABLES.json");
|
||||||
var tableServiceResponse = objectMapper.readValue(cvTablesResource.getInputStream(), TableServiceResponse.class);
|
var tableServiceResponse = objectMapper.readValue(cvTablesResource.getInputStream(), TableServiceResponse.class);
|
||||||
|
|
||||||
@ -103,6 +111,25 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void tablesToHtmlDebuggerWithLinesCVResponse() throws IOException {
|
||||||
|
|
||||||
|
String fileName = "files/cv_tables/VV-331340-first100.pdf";
|
||||||
|
String lineFileName = "/tmp/" + Path.of(fileName).getFileName().toString() + "_LINES.pdf";
|
||||||
|
ClassPathResource pdfFileResource = new ClassPathResource(fileName);
|
||||||
|
ClassPathResource cvTablesResource = new ClassPathResource("files/cv_tables/VV-331340-first100.TABLES.json");
|
||||||
|
var tableServiceResponse = objectMapper.readValue(cvTablesResource.getInputStream(), TableServiceResponse.class);
|
||||||
|
|
||||||
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile(), tableServiceResponse);
|
||||||
|
|
||||||
|
Map<Integer, List<Ruling>> rulingsPerPage = cvTableParsingAdapter.buildCvParsedRulingsPerPage(tableServiceResponse);
|
||||||
|
toHtml(document, "/tmp/VV-331340-first100.html");
|
||||||
|
|
||||||
|
PdfDraw.drawLinesPerPage(fileName, rulingsPerPage.values().stream().toList(), lineFileName);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Disabled
|
@Disabled
|
||||||
@Test
|
@Test
|
||||||
public void testScanRotationBorderIsIgnored() throws IOException {
|
public void testScanRotationBorderIsIgnored() throws IOException {
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user